cities, long nameAddress, int number, int hash, int nameLength) {
236 | Result r = new Result(nameAddress, number);
237 | results[hash] = r;
238 | byte[] bytes = new byte[nameLength];
239 | UNSAFE.copyMemory(null, nameAddress, bytes, Unsafe.ARRAY_BYTE_BASE_OFFSET, nameLength);
240 | cities.put(new String(bytes, StandardCharsets.UTF_8), r);
241 | }
242 |
243 | private static long[] getSegments(int numberOfChunks) throws IOException {
244 | try (var fileChannel = FileChannel.open(Path.of(FILE), StandardOpenOption.READ)) {
245 | long fileSize = fileChannel.size();
246 | long segmentSize = (fileSize + numberOfChunks - 1) / numberOfChunks;
247 | long[] chunks = new long[numberOfChunks + 1];
248 | long mappedAddress = fileChannel.map(MapMode.READ_ONLY, 0, fileSize, Arena.global()).address();
249 | chunks[0] = mappedAddress;
250 | long endAddress = mappedAddress + fileSize;
251 | for (int i = 1; i < numberOfChunks; ++i) {
252 | long chunkAddress = mappedAddress + i * segmentSize;
253 | // Align to first row start.
254 | while (chunkAddress < endAddress && UNSAFE.getByte(chunkAddress++) != '\n') {
255 | // nop
256 | }
257 | chunks[i] = Math.min(chunkAddress, endAddress);
258 | }
259 | chunks[numberOfChunks] = endAddress;
260 | return chunks;
261 | }
262 | }
263 | }
264 |
--------------------------------------------------------------------------------
/src/main/java/dev/morling/onebrc/CalculateAverage_thomaswue_v4.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2023 The original authors
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package dev.morling.onebrc;
17 |
18 | import java.io.IOException;
19 | import java.lang.foreign.Arena;
20 | import java.lang.reflect.Field;
21 | import java.nio.channels.FileChannel;
22 | import java.nio.channels.FileChannel.MapMode;
23 | import java.nio.charset.StandardCharsets;
24 | import java.nio.file.Path;
25 | import java.nio.file.StandardOpenOption;
26 | import java.util.HashMap;
27 | import java.util.List;
28 | import java.util.Map;
29 | import java.util.TreeMap;
30 | import java.util.stream.IntStream;
31 |
32 | import sun.misc.Unsafe;
33 |
34 | /**
35 | * Simple solution that memory maps the input file, then splits it into one segment per available core and uses
36 | * sun.misc.Unsafe to directly access the mapped memory. Uses a long at a time when checking for collision.
37 | *
38 | * Runs in 0.70s on my Intel i9-13900K
39 | * Perf stats:
40 | * 40,622,862,783 cpu_core/cycles/
41 | * 48,241,929,925 cpu_atom/cycles/
42 | */
43 | public class CalculateAverage_thomaswue_v4 {
44 | private static final String FILE = "./measurements.txt";
45 |
46 | // Holding the current result for a single city.
47 | private static class Result {
48 | final long nameAddress;
49 | long lastNameLong;
50 | int remainingShift;
51 | int min;
52 | int max;
53 | long sum;
54 | int count;
55 |
56 | private Result(long nameAddress, int value) {
57 | this.nameAddress = nameAddress;
58 | this.min = value;
59 | this.max = value;
60 | this.sum = value;
61 | this.count = 1;
62 | }
63 |
64 | public String toString() {
65 | return round(((double) min) / 10.0) + "/" + round((((double) sum) / 10.0) / count) + "/" + round(((double) max) / 10.0);
66 | }
67 |
68 | private static double round(double value) {
69 | return Math.round(value * 10.0) / 10.0;
70 | }
71 |
72 | // Accumulate another result into this one.
73 | private void add(Result other) {
74 | min = Math.min(min, other.min);
75 | max = Math.max(max, other.max);
76 | sum += other.sum;
77 | count += other.count;
78 | }
79 | }
80 |
81 | public static void main(String[] args) throws IOException {
82 | // Calculate input segments.
83 | int numberOfChunks = Runtime.getRuntime().availableProcessors();
84 | long[] chunks = getSegments(numberOfChunks);
85 |
86 | // Parallel processing of segments.
87 | List> allResults = IntStream.range(0, chunks.length - 1).mapToObj(chunkIndex -> {
88 | HashMap cities = HashMap.newHashMap(1 << 10);
89 | parseLoop(chunks[chunkIndex], chunks[chunkIndex + 1], cities);
90 | return cities;
91 | }).parallel().toList();
92 |
93 | // Accumulate results sequentially.
94 | HashMap result = allResults.getFirst();
95 | for (int i = 1; i < allResults.size(); ++i) {
96 | for (Map.Entry entry : allResults.get(i).entrySet()) {
97 | Result current = result.putIfAbsent(entry.getKey(), entry.getValue());
98 | if (current != null) {
99 | current.add(entry.getValue());
100 | }
101 | }
102 | }
103 |
104 | // Final output.
105 | System.out.println(new TreeMap<>(result));
106 | }
107 |
108 | private static final Unsafe UNSAFE = initUnsafe();
109 |
110 | private static Unsafe initUnsafe() {
111 | try {
112 | Field theUnsafe = Unsafe.class.getDeclaredField("theUnsafe");
113 | theUnsafe.setAccessible(true);
114 | return (Unsafe) theUnsafe.get(Unsafe.class);
115 | }
116 | catch (NoSuchFieldException | IllegalAccessException e) {
117 | throw new RuntimeException(e);
118 | }
119 | }
120 |
121 | private static void parseLoop(long chunkStart, long chunkEnd, HashMap cities) {
122 | Result[] results = new Result[1 << 18];
123 | long scanPtr = chunkStart;
124 | while (scanPtr < chunkEnd) {
125 | long nameAddress = scanPtr;
126 | long hash = 0;
127 |
128 | // Search for ';', one long at a time.
129 | long word = UNSAFE.getLong(scanPtr);
130 | int pos = findDelimiter(word);
131 | if (pos != 8) {
132 | scanPtr += pos;
133 | word = word & (-1L >>> ((8 - pos - 1) << 3));
134 | hash ^= word;
135 | }
136 | else {
137 | scanPtr += 8;
138 | hash ^= word;
139 | while (true) {
140 | word = UNSAFE.getLong(scanPtr);
141 | pos = findDelimiter(word);
142 | if (pos != 8) {
143 | scanPtr += pos;
144 | word = word & (-1L >>> ((8 - pos - 1) << 3));
145 | hash ^= word;
146 | break;
147 | }
148 | else {
149 | scanPtr += 8;
150 | hash ^= word;
151 | }
152 | }
153 | }
154 |
155 | // Save length of name for later.
156 | int nameLength = (int) (scanPtr - nameAddress);
157 | scanPtr++;
158 |
159 | long numberWord = UNSAFE.getLong(scanPtr);
160 | // The 4th binary digit of the ascii of a digit is 1 while
161 | // that of the '.' is 0. This finds the decimal separator
162 | // The value can be 12, 20, 28
163 | int decimalSepPos = Long.numberOfTrailingZeros(~numberWord & 0x10101000);
164 | int number = convertIntoNumber(decimalSepPos, numberWord);
165 |
166 | // Skip past new line.
167 | // scanPtr++;
168 | scanPtr += (decimalSepPos >>> 3) + 3;
169 |
170 | // Final calculation for index into hash table.
171 | int hashAsInt = (int) (hash ^ (hash >>> 32));
172 | int finalHash = (hashAsInt ^ (hashAsInt >>> 18));
173 | int tableIndex = (finalHash & (results.length - 1));
174 | outer: while (true) {
175 | Result existingResult = results[tableIndex];
176 | if (existingResult == null) {
177 | newEntry(results, cities, nameAddress, number, tableIndex, nameLength);
178 | break;
179 | }
180 | else {
181 | // Check for collision.
182 | int i = 0;
183 | for (; i < nameLength + 1 - 8; i += 8) {
184 | if (UNSAFE.getLong(existingResult.nameAddress + i) != UNSAFE.getLong(nameAddress + i)) {
185 | tableIndex = (tableIndex + 1) & (results.length - 1);
186 | continue outer;
187 | }
188 | }
189 | if (((existingResult.lastNameLong ^ UNSAFE.getLong(nameAddress + i)) << existingResult.remainingShift) == 0) {
190 | existingResult.min = Math.min(existingResult.min, number);
191 | existingResult.max = Math.max(existingResult.max, number);
192 | existingResult.sum += number;
193 | existingResult.count++;
194 | break;
195 | }
196 | else {
197 | // Collision error, try next.
198 | tableIndex = (tableIndex + 1) & (results.length - 1);
199 | }
200 | }
201 | }
202 | }
203 | }
204 |
205 | // Special method to convert a number in the specific format into an int value without branches created by
206 | // Quan Anh Mai.
207 | private static int convertIntoNumber(int decimalSepPos, long numberWord) {
208 | int shift = 28 - decimalSepPos;
209 | // signed is -1 if negative, 0 otherwise
210 | long signed = (~numberWord << 59) >> 63;
211 | long designMask = ~(signed & 0xFF);
212 | // Align the number to a specific position and transform the ascii code
213 | // to actual digit value in each byte
214 | long digits = ((numberWord & designMask) << shift) & 0x0F000F0F00L;
215 |
216 | // Now digits is in the form 0xUU00TTHH00 (UU: units digit, TT: tens digit, HH: hundreds digit)
217 | // 0xUU00TTHH00 * (100 * 0x1000000 + 10 * 0x10000 + 1) =
218 | // 0x000000UU00TTHH00 +
219 | // 0x00UU00TTHH000000 * 10 +
220 | // 0xUU00TTHH00000000 * 100
221 | // Now TT * 100 has 2 trailing zeroes and HH * 100 + TT * 10 + UU < 0x400
222 | // This results in our value lies in the bit 32 to 41 of this product
223 | // That was close :)
224 | long absValue = ((digits * 0x640a0001) >>> 32) & 0x3FF;
225 | long value = (absValue ^ signed) - signed;
226 | return (int) value;
227 | }
228 |
229 | private static int findDelimiter(long word) {
230 | long input = word ^ 0x3B3B3B3B3B3B3B3BL;
231 | long tmp = (input - 0x0101010101010101L) & ~input & 0x8080808080808080L;
232 | return Long.numberOfTrailingZeros(tmp) >>> 3;
233 | }
234 |
235 | private static void newEntry(Result[] results, HashMap cities, long nameAddress, int number, int hash, int nameLength) {
236 | Result r = new Result(nameAddress, number);
237 | results[hash] = r;
238 | byte[] bytes = new byte[nameLength];
239 |
240 | int i = 0;
241 | for (; i < nameLength + 1 - 8; i += 8) {
242 | }
243 | r.lastNameLong = UNSAFE.getLong(nameAddress + i);
244 | r.remainingShift = (64 - (nameLength + 1 - i) << 3);
245 | UNSAFE.copyMemory(null, nameAddress, bytes, Unsafe.ARRAY_BYTE_BASE_OFFSET, nameLength);
246 | String nameAsString = new String(bytes, StandardCharsets.UTF_8);
247 | cities.put(nameAsString, r);
248 | }
249 |
250 | private static long[] getSegments(int numberOfChunks) throws IOException {
251 | try (var fileChannel = FileChannel.open(Path.of(FILE), StandardOpenOption.READ)) {
252 | long fileSize = fileChannel.size();
253 | long segmentSize = (fileSize + numberOfChunks - 1) / numberOfChunks;
254 | long[] chunks = new long[numberOfChunks + 1];
255 | long mappedAddress = fileChannel.map(MapMode.READ_ONLY, 0, fileSize, Arena.global()).address();
256 | chunks[0] = mappedAddress;
257 | long endAddress = mappedAddress + fileSize;
258 | for (int i = 1; i < numberOfChunks; ++i) {
259 | long chunkAddress = mappedAddress + i * segmentSize;
260 | // Align to first row start.
261 | while (chunkAddress < endAddress && UNSAFE.getByte(chunkAddress++) != '\n') {
262 | // nop
263 | }
264 | chunks[i] = Math.min(chunkAddress, endAddress);
265 | }
266 | chunks[numberOfChunks] = endAddress;
267 | return chunks;
268 | }
269 | }
270 | }
271 |
--------------------------------------------------------------------------------
/src/main/java/dev/morling/onebrc/CalculateAverage_thomaswue_v5.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2023 The original authors
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package dev.morling.onebrc;
17 |
18 | import java.io.IOException;
19 | import java.lang.foreign.Arena;
20 | import java.lang.reflect.Field;
21 | import java.nio.channels.FileChannel;
22 | import java.nio.channels.FileChannel.MapMode;
23 | import java.nio.charset.StandardCharsets;
24 | import java.nio.file.Path;
25 | import java.nio.file.StandardOpenOption;
26 | import java.util.ArrayList;
27 | import java.util.List;
28 | import java.util.TreeMap;
29 | import java.util.stream.IntStream;
30 |
31 | import sun.misc.Unsafe;
32 |
33 | /**
34 | * Simple solution that memory maps the input file, then splits it into one segment per available core and uses
35 | * sun.misc.Unsafe to directly access the mapped memory. Uses a long at a time when checking for collision.
36 | *
37 | * Runs in 0.66s on my Intel i9-13900K
38 | * Perf stats:
39 | * 35,935,262,091 cpu_core/cycles/
40 | * 47,305,591,173 cpu_atom/cycles/
41 | */
42 | public class CalculateAverage_thomaswue_v5 {
43 | private static final String FILE = "./measurements.txt";
44 |
45 | // Holding the current result for a single city.
46 | private static class Result {
47 | long lastNameLong, secondLastNameLong, nameAddress;
48 | int nameLength, remainingShift;
49 | int min, max, count;
50 | long sum;
51 |
52 | private Result(long nameAddress) {
53 | this.nameAddress = nameAddress;
54 | this.min = Integer.MAX_VALUE;
55 | this.max = Integer.MIN_VALUE;
56 | }
57 |
58 | public String toString() {
59 | return round(((double) min) / 10.0) + "/" + round((((double) sum) / 10.0) / count) + "/" + round(((double) max) / 10.0);
60 | }
61 |
62 | private static double round(double value) {
63 | return Math.round(value * 10.0) / 10.0;
64 | }
65 |
66 | // Accumulate another result into this one.
67 | private void add(Result other) {
68 | min = Math.min(min, other.min);
69 | max = Math.max(max, other.max);
70 | sum += other.sum;
71 | count += other.count;
72 | }
73 |
74 | public String calcName() {
75 | return new Scanner(nameAddress, nameAddress + nameLength).getString(nameLength);
76 | }
77 | }
78 |
79 | public static void main(String[] args) throws IOException {
80 | // Calculate input segments.
81 | int numberOfChunks = Runtime.getRuntime().availableProcessors();
82 | long[] chunks = getSegments(numberOfChunks);
83 |
84 | // Parallel processing of segments.
85 | List> allResults = IntStream.range(0, chunks.length - 1).mapToObj(chunkIndex -> parseLoop(chunks[chunkIndex], chunks[chunkIndex + 1]))
86 | .map(resultArray -> {
87 | List results = new ArrayList<>();
88 | for (Result r : resultArray) {
89 | if (r != null) {
90 | results.add(r);
91 | }
92 | }
93 | return results;
94 | }).parallel().toList();
95 |
96 | // Final output.
97 | System.out.println(accumulateResults(allResults));
98 | }
99 |
100 | // Accumulate results sequentially for simplicity.
101 | private static TreeMap accumulateResults(List> allResults) {
102 | TreeMap result = new TreeMap<>();
103 | for (List resultArr : allResults) {
104 | for (Result r : resultArr) {
105 | String name = r.calcName();
106 | Result current = result.putIfAbsent(name, r);
107 | if (current != null) {
108 | current.add(r);
109 | }
110 | }
111 | }
112 | return result;
113 | }
114 |
115 | // Main parse loop.
116 | private static Result[] parseLoop(long chunkStart, long chunkEnd) {
117 | Result[] results = new Result[1 << 18];
118 | Scanner scanner = new Scanner(chunkStart, chunkEnd);
119 | while (scanner.hasNext()) {
120 | long nameAddress = scanner.pos();
121 | long hash = 0;
122 |
123 | // Search for ';', one long at a time.
124 | long word = scanner.getLong();
125 | int pos = findDelimiter(word);
126 | if (pos != 8) {
127 | scanner.add(pos);
128 | word = mask(word, pos);
129 | hash ^= word;
130 |
131 | Result existingResult = results[hashToIndex(hash, results)];
132 | if (existingResult != null && existingResult.lastNameLong == word) {
133 | scanAndRecord(scanner, existingResult);
134 | continue;
135 | }
136 | }
137 | else {
138 | scanner.add(8);
139 | hash ^= word;
140 | long prevWord = word;
141 | word = scanner.getLong();
142 | pos = findDelimiter(word);
143 | if (pos != 8) {
144 | scanner.add(pos);
145 | word = mask(word, pos);
146 | hash ^= word;
147 | Result existingResult = results[hashToIndex(hash, results)];
148 | if (existingResult != null && existingResult.lastNameLong == word && existingResult.secondLastNameLong == prevWord) {
149 | scanAndRecord(scanner, existingResult);
150 | continue;
151 | }
152 | }
153 | else {
154 | scanner.add(8);
155 | hash ^= word;
156 | while (true) {
157 | word = scanner.getLong();
158 | pos = findDelimiter(word);
159 | if (pos != 8) {
160 | scanner.add(pos);
161 | word = mask(word, pos);
162 | hash ^= word;
163 | break;
164 | }
165 | else {
166 | scanner.add(8);
167 | hash ^= word;
168 | }
169 | }
170 | }
171 | }
172 |
173 | // Save length of name for later.
174 | int nameLength = (int) (scanner.pos() - nameAddress);
175 | scanner.add(1);
176 |
177 | long numberWord = scanner.getLong();
178 | int decimalSepPos = Long.numberOfTrailingZeros(~numberWord & 0x10101000);
179 | int number = convertIntoNumber(decimalSepPos, numberWord);
180 | scanner.add((decimalSepPos >>> 3) + 3);
181 |
182 | // Final calculation for index into hash table.
183 | int tableIndex = hashToIndex(hash, results);
184 | outer: while (true) {
185 | Result existingResult = results[tableIndex];
186 | if (existingResult == null) {
187 | existingResult = newEntry(results, nameAddress, tableIndex, nameLength, scanner);
188 | }
189 | // Check for collision.
190 | int i = 0;
191 | for (; i < nameLength + 1 - 8; i += 8) {
192 | if (scanner.getLongAt(existingResult.nameAddress + i) != scanner.getLongAt(nameAddress + i)) {
193 | tableIndex = (tableIndex + 1) & (results.length - 1);
194 | continue outer;
195 | }
196 | }
197 | if (((existingResult.lastNameLong ^ scanner.getLongAt(nameAddress + i)) << existingResult.remainingShift) == 0) {
198 | record(existingResult, number);
199 | break;
200 | }
201 | else {
202 | // Collision error, try next.
203 | tableIndex = (tableIndex + 1) & (results.length - 1);
204 | }
205 | }
206 | }
207 | return results;
208 | }
209 |
210 | private static void scanAndRecord(Scanner scanPtr, Result existingResult) {
211 | scanPtr.add(1);
212 | long numberWord = scanPtr.getLong();
213 | int decimalSepPos = Long.numberOfTrailingZeros(~numberWord & 0x10101000);
214 | int number = convertIntoNumber(decimalSepPos, numberWord);
215 | scanPtr.add((decimalSepPos >>> 3) + 3);
216 | record(existingResult, number);
217 | }
218 |
219 | private static void record(Result existingResult, int number) {
220 | existingResult.min = Math.min(existingResult.min, number);
221 | existingResult.max = Math.max(existingResult.max, number);
222 | existingResult.sum += number;
223 | existingResult.count++;
224 | }
225 |
226 | private static int hashToIndex(long hash, Result[] results) {
227 | int hashAsInt = (int) (hash ^ (hash >>> 32));
228 | int finalHash = (hashAsInt ^ (hashAsInt >>> 18));
229 | return (finalHash & (results.length - 1));
230 | }
231 |
232 | private static long mask(long word, int pos) {
233 | return word & (-1L >>> ((8 - pos - 1) << 3));
234 | }
235 |
236 | // Special method to convert a number in the specific format into an int value without branches created by
237 | // Quan Anh Mai.
238 | private static int convertIntoNumber(int decimalSepPos, long numberWord) {
239 | int shift = 28 - decimalSepPos;
240 | // signed is -1 if negative, 0 otherwise
241 | long signed = (~numberWord << 59) >> 63;
242 | long designMask = ~(signed & 0xFF);
243 | // Align the number to a specific position and transform the ascii code
244 | // to actual digit value in each byte
245 | long digits = ((numberWord & designMask) << shift) & 0x0F000F0F00L;
246 |
247 | // Now digits is in the form 0xUU00TTHH00 (UU: units digit, TT: tens digit, HH: hundreds digit)
248 | // 0xUU00TTHH00 * (100 * 0x1000000 + 10 * 0x10000 + 1) =
249 | // 0x000000UU00TTHH00 +
250 | // 0x00UU00TTHH000000 * 10 +
251 | // 0xUU00TTHH00000000 * 100
252 | // Now TT * 100 has 2 trailing zeroes and HH * 100 + TT * 10 + UU < 0x400
253 | // This results in our value lies in the bit 32 to 41 of this product
254 | // That was close :)
255 | long absValue = ((digits * 0x640a0001) >>> 32) & 0x3FF;
256 | long value = (absValue ^ signed) - signed;
257 | return (int) value;
258 | }
259 |
260 | private static int findDelimiter(long word) {
261 | long input = word ^ 0x3B3B3B3B3B3B3B3BL;
262 | long tmp = (input - 0x0101010101010101L) & ~input & 0x8080808080808080L;
263 | return Long.numberOfTrailingZeros(tmp) >>> 3;
264 | }
265 |
266 | private static Result newEntry(Result[] results, long nameAddress, int hash, int nameLength, Scanner scanner) {
267 | Result r = new Result(nameAddress);
268 | results[hash] = r;
269 |
270 | int i = 0;
271 | for (; i < nameLength + 1 - 8; i += 8) {
272 | r.secondLastNameLong = (scanner.getLongAt(nameAddress + i));
273 | }
274 | r.remainingShift = (64 - (nameLength + 1 - i) << 3);
275 | r.lastNameLong = (scanner.getLongAt(nameAddress + i) & (-1L >>> r.remainingShift));
276 | r.nameLength = nameLength;
277 | return r;
278 | }
279 |
280 | private static long[] getSegments(int numberOfChunks) throws IOException {
281 | try (var fileChannel = FileChannel.open(Path.of(FILE), StandardOpenOption.READ)) {
282 | long fileSize = fileChannel.size();
283 | long segmentSize = (fileSize + numberOfChunks - 1) / numberOfChunks;
284 | long[] chunks = new long[numberOfChunks + 1];
285 | long mappedAddress = fileChannel.map(MapMode.READ_ONLY, 0, fileSize, Arena.global()).address();
286 | chunks[0] = mappedAddress;
287 | long endAddress = mappedAddress + fileSize;
288 | Scanner s = new Scanner(mappedAddress, mappedAddress + fileSize);
289 | for (int i = 1; i < numberOfChunks; ++i) {
290 | long chunkAddress = mappedAddress + i * segmentSize;
291 | // Align to first row start.
292 | while (chunkAddress < endAddress && (s.getLongAt(chunkAddress++) & 0xFF) != '\n') {
293 | // nop
294 | }
295 | chunks[i] = Math.min(chunkAddress, endAddress);
296 | }
297 | chunks[numberOfChunks] = endAddress;
298 | return chunks;
299 | }
300 | }
301 |
302 | private static class Scanner {
303 |
304 | private static final Unsafe UNSAFE = initUnsafe();
305 |
306 | private static Unsafe initUnsafe() {
307 | try {
308 | Field theUnsafe = Unsafe.class.getDeclaredField("theUnsafe");
309 | theUnsafe.setAccessible(true);
310 | return (Unsafe) theUnsafe.get(Unsafe.class);
311 | }
312 | catch (NoSuchFieldException | IllegalAccessException e) {
313 | throw new RuntimeException(e);
314 | }
315 | }
316 |
317 | long pos, end;
318 |
319 | public Scanner(long start, long end) {
320 | this.pos = start;
321 | this.end = end;
322 | }
323 |
324 | boolean hasNext() {
325 | return pos < end;
326 | }
327 |
328 | long pos() {
329 | return pos;
330 | }
331 |
332 | void add(int delta) {
333 | pos += delta;
334 | }
335 |
336 | long getLong() {
337 | return UNSAFE.getLong(pos);
338 | }
339 |
340 | long getLongAt(long pos) {
341 | return UNSAFE.getLong(pos);
342 | }
343 |
344 | public String getString(int nameLength) {
345 | byte[] bytes = new byte[nameLength];
346 | UNSAFE.copyMemory(null, pos, bytes, Unsafe.ARRAY_BYTE_BASE_OFFSET, nameLength);
347 | return new String(bytes, StandardCharsets.UTF_8);
348 | }
349 | }
350 | }
351 |
--------------------------------------------------------------------------------
/src/main/java/dev/morling/onebrc/CalculateAverage_thomaswue_v6.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2023 The original authors
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package dev.morling.onebrc;
17 |
18 | import java.io.IOException;
19 | import java.lang.foreign.Arena;
20 | import java.lang.reflect.Field;
21 | import java.nio.channels.FileChannel;
22 | import java.nio.channels.FileChannel.MapMode;
23 | import java.nio.charset.StandardCharsets;
24 | import java.nio.file.Path;
25 | import java.nio.file.StandardOpenOption;
26 | import java.util.ArrayList;
27 | import java.util.List;
28 | import java.util.TreeMap;
29 | import java.util.stream.IntStream;
30 |
31 | import sun.misc.Unsafe;
32 |
33 | /**
34 | * Simple solution that memory maps the input file, then splits it into one segment per available core and uses
35 | * sun.misc.Unsafe to directly access the mapped memory. Uses a long at a time when checking for collision.
36 | *
37 | * Runs in 0.60s on my Intel i9-13900K
38 | * Perf stats:
39 | * 34,716,719,245 cpu_core/cycles/
40 | * 40,776,530,892 cpu_atom/cycles/
41 | */
42 | public class CalculateAverage_thomaswue_v6 {
43 | private static final String FILE = "./measurements.txt";
44 |
45 | // Holding the current result for a single city.
46 | private static class Result {
47 | long lastNameLong, secondLastNameLong, nameAddress;
48 | int nameLength, remainingShift;
49 | int min, max, count;
50 | long sum;
51 |
52 | private Result(long nameAddress) {
53 | this.nameAddress = nameAddress;
54 | this.min = Integer.MAX_VALUE;
55 | this.max = Integer.MIN_VALUE;
56 | }
57 |
58 | public String toString() {
59 | return round(((double) min) / 10.0) + "/" + round((((double) sum) / 10.0) / count) + "/" + round(((double) max) / 10.0);
60 | }
61 |
62 | private static double round(double value) {
63 | return Math.round(value * 10.0) / 10.0;
64 | }
65 |
66 | // Accumulate another result into this one.
67 | private void add(Result other) {
68 | min = Math.min(min, other.min);
69 | max = Math.max(max, other.max);
70 | sum += other.sum;
71 | count += other.count;
72 | }
73 |
74 | public String calcName() {
75 | return new Scanner(nameAddress, nameAddress + nameLength).getString(nameLength);
76 | }
77 | }
78 |
79 | public static void main(String[] args) throws IOException {
80 | // Calculate input segments.
81 | int numberOfChunks = Runtime.getRuntime().availableProcessors();
82 | long[] chunks = getSegments(numberOfChunks);
83 |
84 | // Parallel processing of segments.
85 | List> allResults = IntStream.range(0, chunks.length - 1).mapToObj(chunkIndex -> parseLoop(chunks[chunkIndex], chunks[chunkIndex + 1]))
86 | .map(resultArray -> {
87 | List results = new ArrayList<>();
88 | for (Result r : resultArray) {
89 | if (r != null) {
90 | results.add(r);
91 | }
92 | }
93 | return results;
94 | }).parallel().toList();
95 |
96 | // Final output.
97 | System.out.println(accumulateResults(allResults));
98 | }
99 |
100 | // Accumulate results sequentially for simplicity.
101 | private static TreeMap accumulateResults(List> allResults) {
102 | TreeMap result = new TreeMap<>();
103 | for (List resultArr : allResults) {
104 | for (Result r : resultArr) {
105 | String name = r.calcName();
106 | Result current = result.putIfAbsent(name, r);
107 | if (current != null) {
108 | current.add(r);
109 | }
110 | }
111 | }
112 | return result;
113 | }
114 |
115 | // Main parse loop.
116 | private static Result[] parseLoop(long chunkStart, long chunkEnd) {
117 | Result[] results = new Result[1 << 17];
118 | Scanner scanner = new Scanner(chunkStart, chunkEnd);
119 | long word = scanner.getLong();
120 | int pos = findDelimiter(word);
121 | while (scanner.hasNext()) {
122 | long nameAddress = scanner.pos();
123 | long hash = 0;
124 |
125 | // Search for ';', one long at a time.
126 | if (pos != 8) {
127 | scanner.add(pos);
128 | word = mask(word, pos);
129 | hash = word;
130 |
131 | int number = scanNumber(scanner);
132 | long nextWord = scanner.getLong();
133 | int nextPos = findDelimiter(nextWord);
134 |
135 | Result existingResult = results[hashToIndex(hash, results)];
136 | if (existingResult != null && existingResult.lastNameLong == word) {
137 | word = nextWord;
138 | pos = nextPos;
139 | record(existingResult, number);
140 | continue;
141 | }
142 |
143 | scanner.setPos(nameAddress + pos);
144 | }
145 | else {
146 | scanner.add(8);
147 | hash ^= word;
148 | long prevWord = word;
149 | word = scanner.getLong();
150 | pos = findDelimiter(word);
151 | if (pos != 8) {
152 | scanner.add(pos);
153 | word = mask(word, pos);
154 | hash ^= word;
155 |
156 | Result existingResult = results[hashToIndex(hash, results)];
157 | if (existingResult != null && existingResult.lastNameLong == word && existingResult.secondLastNameLong == prevWord) {
158 | int number = scanNumber(scanner);
159 | word = scanner.getLong();
160 | pos = findDelimiter(word);
161 | record(existingResult, number);
162 | continue;
163 | }
164 | }
165 | else {
166 | scanner.add(8);
167 | hash ^= word;
168 | while (true) {
169 | word = scanner.getLong();
170 | pos = findDelimiter(word);
171 | if (pos != 8) {
172 | scanner.add(pos);
173 | word = mask(word, pos);
174 | hash ^= word;
175 | break;
176 | }
177 | else {
178 | scanner.add(8);
179 | hash ^= word;
180 | }
181 | }
182 | }
183 | }
184 |
185 | // Save length of name for later.
186 | int nameLength = (int) (scanner.pos() - nameAddress);
187 | scanner.add(1);
188 |
189 | long numberWord = scanner.getLong();
190 | int decimalSepPos = Long.numberOfTrailingZeros(~numberWord & 0x10101000);
191 | int number = convertIntoNumber(decimalSepPos, numberWord);
192 | scanner.add((decimalSepPos >>> 3) + 3);
193 |
194 | // Final calculation for index into hash table.
195 | int tableIndex = hashToIndex(hash, results);
196 | outer: while (true) {
197 | Result existingResult = results[tableIndex];
198 | if (existingResult == null) {
199 | existingResult = newEntry(results, nameAddress, tableIndex, nameLength, scanner);
200 | }
201 | // Check for collision.
202 | int i = 0;
203 | for (; i < nameLength + 1 - 8; i += 8) {
204 | if (scanner.getLongAt(existingResult.nameAddress + i) != scanner.getLongAt(nameAddress + i)) {
205 | tableIndex = (tableIndex + 31) & (results.length - 1);
206 | continue outer;
207 | }
208 | }
209 | if (((existingResult.lastNameLong ^ scanner.getLongAt(nameAddress + i)) << existingResult.remainingShift) == 0) {
210 | record(existingResult, number);
211 | break;
212 | }
213 | else {
214 | // Collision error, try next.
215 | tableIndex = (tableIndex + 31) & (results.length - 1);
216 | }
217 | }
218 |
219 | word = scanner.getLong();
220 | pos = findDelimiter(word);
221 | }
222 | return results;
223 | }
224 |
225 | private static int scanNumber(Scanner scanPtr) {
226 | scanPtr.add(1);
227 | long numberWord = scanPtr.getLong();
228 | int decimalSepPos = Long.numberOfTrailingZeros(~numberWord & 0x10101000);
229 | int number = convertIntoNumber(decimalSepPos, numberWord);
230 | scanPtr.add((decimalSepPos >>> 3) + 3);
231 | return number;
232 | }
233 |
234 | private static void record(Result existingResult, int number) {
235 | existingResult.min = Math.min(existingResult.min, number);
236 | existingResult.max = Math.max(existingResult.max, number);
237 | existingResult.sum += number;
238 | existingResult.count++;
239 | }
240 |
241 | private static int hashToIndex(long hash, Result[] results) {
242 | int hashAsInt = (int) (hash ^ (hash >>> 28));
243 | int finalHash = (hashAsInt ^ (hashAsInt >>> 15));
244 | return (finalHash & (results.length - 1));
245 | }
246 |
247 | private static long mask(long word, int pos) {
248 | return word & (-1L >>> ((8 - pos - 1) << 3));
249 | }
250 |
251 | // Special method to convert a number in the specific format into an int value without branches created by
252 | // Quan Anh Mai.
253 | private static int convertIntoNumber(int decimalSepPos, long numberWord) {
254 | int shift = 28 - decimalSepPos;
255 | // signed is -1 if negative, 0 otherwise
256 | long signed = (~numberWord << 59) >> 63;
257 | long designMask = ~(signed & 0xFF);
258 | // Align the number to a specific position and transform the ascii code
259 | // to actual digit value in each byte
260 | long digits = ((numberWord & designMask) << shift) & 0x0F000F0F00L;
261 |
262 | // Now digits is in the form 0xUU00TTHH00 (UU: units digit, TT: tens digit, HH: hundreds digit)
263 | // 0xUU00TTHH00 * (100 * 0x1000000 + 10 * 0x10000 + 1) =
264 | // 0x000000UU00TTHH00 +
265 | // 0x00UU00TTHH000000 * 10 +
266 | // 0xUU00TTHH00000000 * 100
267 | // Now TT * 100 has 2 trailing zeroes and HH * 100 + TT * 10 + UU < 0x400
268 | // This results in our value lies in the bit 32 to 41 of this product
269 | // That was close :)
270 | long absValue = ((digits * 0x640a0001) >>> 32) & 0x3FF;
271 | long value = (absValue ^ signed) - signed;
272 | return (int) value;
273 | }
274 |
275 | private static int findDelimiter(long word) {
276 | long input = word ^ 0x3B3B3B3B3B3B3B3BL;
277 | long tmp = (input - 0x0101010101010101L) & ~input & 0x8080808080808080L;
278 | return Long.numberOfTrailingZeros(tmp) >>> 3;
279 | }
280 |
281 | private static Result newEntry(Result[] results, long nameAddress, int hash, int nameLength, Scanner scanner) {
282 | Result r = new Result(nameAddress);
283 | results[hash] = r;
284 |
285 | int i = 0;
286 | for (; i < nameLength + 1 - 8; i += 8) {
287 | r.secondLastNameLong = (scanner.getLongAt(nameAddress + i));
288 | }
289 | r.remainingShift = (64 - (nameLength + 1 - i) << 3);
290 | r.lastNameLong = (scanner.getLongAt(nameAddress + i) & (-1L >>> r.remainingShift));
291 | r.nameLength = nameLength;
292 | return r;
293 | }
294 |
295 | private static long[] getSegments(int numberOfChunks) throws IOException {
296 | try (var fileChannel = FileChannel.open(Path.of(FILE), StandardOpenOption.READ)) {
297 | long fileSize = fileChannel.size();
298 | long segmentSize = (fileSize + numberOfChunks - 1) / numberOfChunks;
299 | long[] chunks = new long[numberOfChunks + 1];
300 | long mappedAddress = fileChannel.map(MapMode.READ_ONLY, 0, fileSize, Arena.global()).address();
301 | chunks[0] = mappedAddress;
302 | long endAddress = mappedAddress + fileSize;
303 | Scanner s = new Scanner(mappedAddress, mappedAddress + fileSize);
304 | for (int i = 1; i < numberOfChunks; ++i) {
305 | long chunkAddress = mappedAddress + i * segmentSize;
306 | // Align to first row start.
307 | while (chunkAddress < endAddress && (s.getLongAt(chunkAddress++) & 0xFF) != '\n') {
308 | // nop
309 | }
310 | chunks[i] = Math.min(chunkAddress, endAddress);
311 | }
312 | chunks[numberOfChunks] = endAddress;
313 | return chunks;
314 | }
315 | }
316 |
317 | private static class Scanner {
318 |
319 | private static final Unsafe UNSAFE = initUnsafe();
320 |
321 | private static Unsafe initUnsafe() {
322 | try {
323 | Field theUnsafe = Unsafe.class.getDeclaredField("theUnsafe");
324 | theUnsafe.setAccessible(true);
325 | return (Unsafe) theUnsafe.get(Unsafe.class);
326 | }
327 | catch (NoSuchFieldException | IllegalAccessException e) {
328 | throw new RuntimeException(e);
329 | }
330 | }
331 |
332 | long pos, end;
333 |
334 | public Scanner(long start, long end) {
335 | this.pos = start;
336 | this.end = end;
337 | }
338 |
339 | boolean hasNext() {
340 | return pos < end;
341 | }
342 |
343 | long pos() {
344 | return pos;
345 | }
346 |
347 | void add(int delta) {
348 | pos += delta;
349 | }
350 |
351 | long getLong() {
352 | return UNSAFE.getLong(pos);
353 | }
354 |
355 | long getLongAt(long pos) {
356 | return UNSAFE.getLong(pos);
357 | }
358 |
359 | public String getString(int nameLength) {
360 | byte[] bytes = new byte[nameLength];
361 | UNSAFE.copyMemory(null, pos, bytes, Unsafe.ARRAY_BYTE_BASE_OFFSET, nameLength);
362 | return new String(bytes, StandardCharsets.UTF_8);
363 | }
364 |
365 | public void setPos(long l) {
366 | this.pos = l;
367 | }
368 | }
369 | }
370 |
--------------------------------------------------------------------------------
/src/main/java/dev/morling/onebrc/CalculateAverage_thomaswue_v7.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2023 The original authors
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package dev.morling.onebrc;
17 |
18 | import java.io.IOException;
19 | import java.nio.ByteBuffer;
20 | import java.nio.ByteOrder;
21 | import java.nio.channels.FileChannel;
22 | import java.nio.charset.StandardCharsets;
23 | import java.nio.file.Path;
24 | import java.nio.file.StandardOpenOption;
25 | import java.util.ArrayList;
26 | import java.util.Arrays;
27 | import java.util.List;
28 | import java.util.TreeMap;
29 | import java.util.stream.IntStream;
30 |
31 | /**
32 | * Simple solution that memory maps the input file, then splits it into one segment per available core and uses
33 | * sun.misc.Unsafe to directly access the mapped memory. Uses a long at a time when checking for collision.
34 | *
35 | * Runs in 0.60s on my Intel i9-13900K
36 | * Perf stats:
37 | * 34,716,719,245 cpu_core/cycles/
38 | * 40,776,530,892 cpu_atom/cycles/
39 | */
40 | public class CalculateAverage_thomaswue_v7 {
41 | private static final String FILE = "./measurements.txt";
42 | private static final int MIN_TEMP = -999;
43 | private static final int MAX_TEMP = 999;
44 |
45 | // Holding the current result for a single city.
46 | private static class Result {
47 | long lastNameLong, secondLastNameLong;
48 | long[] name;
49 | int count;
50 | short min, max;
51 | long sum;
52 |
53 | private Result() {
54 | this.min = MAX_TEMP;
55 | this.max = MIN_TEMP;
56 | }
57 |
58 | public String toString() {
59 | return round(((double) min) / 10.0) + "/" + round((((double) sum) / 10.0) / count) + "/" + round(((double) max) / 10.0);
60 | }
61 |
62 | private static double round(double value) {
63 | return Math.round(value * 10.0) / 10.0;
64 | }
65 |
66 | // Accumulate another result into this one.
67 | private void add(Result other) {
68 | if (other.min < min) {
69 | min = other.min;
70 | }
71 | if (other.max > max) {
72 | max = other.max;
73 | }
74 | sum += other.sum;
75 | count += other.count;
76 | }
77 |
78 | public String calcName() {
79 | ByteBuffer bb = ByteBuffer.allocate(name.length * Long.BYTES).order(ByteOrder.nativeOrder());
80 | bb.asLongBuffer().put(name);
81 | byte[] array = bb.array();
82 | int i = 0;
83 | while (array[i++] != ';')
84 | ;
85 | return new String(array, 0, i - 1, StandardCharsets.UTF_8);
86 | }
87 | }
88 |
89 | public static void main(String[] args) throws IOException {
90 | if (args.length == 0 || !("--worker".equals(args[0]))) {
91 | spawnWorker();
92 | return;
93 | }
94 | // Calculate input segments.
95 | int numberOfChunks = Runtime.getRuntime().availableProcessors();
96 | long[] chunks = getSegments(numberOfChunks);
97 |
98 | // Parallel processing of segments.
99 | List> allResults = IntStream.range(0, chunks.length - 1).mapToObj(chunkIndex -> parseLoop(chunks[chunkIndex], chunks[chunkIndex + 1]))
100 | .map(resultArray -> {
101 | List results = new ArrayList<>();
102 | for (Result r : resultArray) {
103 | if (r != null) {
104 | results.add(r);
105 | }
106 | }
107 | return results;
108 | }).parallel().toList();
109 |
110 | // Final output.
111 | System.out.println(accumulateResults(allResults));
112 | System.out.close();
113 | }
114 |
115 | private static void spawnWorker() throws IOException {
116 | ProcessHandle.Info info = ProcessHandle.current().info();
117 | ArrayList workerCommand = new ArrayList<>();
118 | info.command().ifPresent(workerCommand::add);
119 | info.arguments().ifPresent(args -> workerCommand.addAll(Arrays.asList(args)));
120 | workerCommand.add("--worker");
121 | new ProcessBuilder()
122 | .command(workerCommand)
123 | .inheritIO()
124 | .redirectOutput(ProcessBuilder.Redirect.PIPE)
125 | .start()
126 | .getInputStream()
127 | .transferTo(System.out);
128 | }
129 |
130 | // Accumulate results sequentially for simplicity.
131 | private static TreeMap accumulateResults(List> allResults) {
132 | TreeMap result = new TreeMap<>();
133 | for (List resultArr : allResults) {
134 | for (Result r : resultArr) {
135 | String name = r.calcName();
136 | Result current = result.putIfAbsent(name, r);
137 | if (current != null) {
138 | current.add(r);
139 | }
140 | }
141 | }
142 | return result;
143 | }
144 |
145 | // Main parse loop.
146 | private static Result[] parseLoop(long chunkStart, long chunkEnd) {
147 | Result[] results = new Result[1 << 17];
148 | Scanner scanner = new Scanner(chunkStart, chunkEnd);
149 | long word = scanner.getLong();
150 | long pos = findDelimiter(word);
151 | while (scanner.hasNext()) {
152 | long nameAddress = scanner.pos();
153 | long hash = 0;
154 |
155 | // Search for ';', one long at a time.
156 | if (pos != 0) {
157 | pos = Long.numberOfTrailingZeros(pos) >>> 3;
158 | scanner.add(pos);
159 | word = mask(word, pos);
160 | hash = word;
161 |
162 | int number = scanNumber(scanner);
163 | long nextWord = scanner.getLong();
164 | long nextPos = findDelimiter(nextWord);
165 |
166 | Result existingResult = results[hashToIndex(hash, results)];
167 | if (existingResult != null && existingResult.lastNameLong == word) {
168 | word = nextWord;
169 | pos = nextPos;
170 | record(existingResult, number);
171 | continue;
172 | }
173 |
174 | scanner.setPos(nameAddress + pos);
175 | }
176 | else {
177 | scanner.add(8);
178 | hash = word;
179 | long prevWord = word;
180 | word = scanner.getLong();
181 | pos = findDelimiter(word);
182 | if (pos != 0) {
183 | pos = Long.numberOfTrailingZeros(pos) >>> 3;
184 | scanner.add(pos);
185 | word = mask(word, pos);
186 | hash ^= word;
187 |
188 | Result existingResult = results[hashToIndex(hash, results)];
189 | if (existingResult != null && existingResult.lastNameLong == word && existingResult.secondLastNameLong == prevWord) {
190 | int number = scanNumber(scanner);
191 | word = scanner.getLong();
192 | pos = findDelimiter(word);
193 | record(existingResult, number);
194 | continue;
195 | }
196 | }
197 | else {
198 | scanner.add(8);
199 | hash ^= word;
200 | while (true) {
201 | word = scanner.getLong();
202 | pos = findDelimiter(word);
203 | if (pos != 0) {
204 | pos = Long.numberOfTrailingZeros(pos) >>> 3;
205 | scanner.add(pos);
206 | word = mask(word, pos);
207 | hash ^= word;
208 | break;
209 | }
210 | else {
211 | scanner.add(8);
212 | hash ^= word;
213 | }
214 | }
215 | }
216 | }
217 |
218 | // Save length of name for later.
219 | int nameLength = (int) (scanner.pos() - nameAddress);
220 | int number = scanNumber(scanner);
221 |
222 | // Final calculation for index into hash table.
223 | int tableIndex = hashToIndex(hash, results);
224 | outer: while (true) {
225 | Result existingResult = results[tableIndex];
226 | if (existingResult == null) {
227 | existingResult = newEntry(results, nameAddress, tableIndex, nameLength, scanner);
228 | }
229 | // Check for collision.
230 | int i = 0;
231 | int namePos = 0;
232 | for (; i < nameLength + 1 - 8; i += 8) {
233 | if (namePos >= existingResult.name.length || existingResult.name[namePos++] != scanner.getLongAt(nameAddress + i)) {
234 | tableIndex = (tableIndex + 31) & (results.length - 1);
235 | continue outer;
236 | }
237 | }
238 |
239 | int remainingShift = (64 - (nameLength + 1 - i) << 3);
240 | if (((existingResult.lastNameLong ^ (scanner.getLongAt(nameAddress + i) << remainingShift)) == 0)) {
241 | record(existingResult, number);
242 | break;
243 | }
244 | else {
245 | // Collision error, try next.
246 | tableIndex = (tableIndex + 31) & (results.length - 1);
247 | }
248 | }
249 |
250 | word = scanner.getLong();
251 | pos = findDelimiter(word);
252 | }
253 | return results;
254 | }
255 |
256 | private static int scanNumber(Scanner scanPtr) {
257 | scanPtr.add(1);
258 | long numberWord = scanPtr.getLong();
259 | int decimalSepPos = Long.numberOfTrailingZeros(~numberWord & 0x10101000);
260 | int number = convertIntoNumber(decimalSepPos, numberWord);
261 | scanPtr.add((decimalSepPos >>> 3) + 3);
262 | return number;
263 | }
264 |
265 | private static void record(Result existingResult, int number) {
266 | if (number < existingResult.min) {
267 | existingResult.min = (short) number;
268 | }
269 | if (number > existingResult.max) {
270 | existingResult.max = (short) number;
271 | }
272 | existingResult.sum += number;
273 | existingResult.count++;
274 | }
275 |
276 | private static int hashToIndex(long hash, Result[] results) {
277 | int hashAsInt = (int) (hash ^ (hash >>> 28));
278 | int finalHash = (hashAsInt ^ (hashAsInt >>> 17));
279 | return (finalHash & (results.length - 1));
280 | }
281 |
282 | private static long mask(long word, long pos) {
283 | return (word << ((7 - pos) << 3));
284 | }
285 |
286 | // Special method to convert a number in the ascii number into an int without branches created by Quan Anh Mai.
287 | private static int convertIntoNumber(int decimalSepPos, long numberWord) {
288 | int shift = 28 - decimalSepPos;
289 | // signed is -1 if negative, 0 otherwise
290 | long signed = (~numberWord << 59) >> 63;
291 | long designMask = ~(signed & 0xFF);
292 | // Align the number to a specific position and transform the ascii to digit value
293 | long digits = ((numberWord & designMask) << shift) & 0x0F000F0F00L;
294 | // Now digits is in the form 0xUU00TTHH00 (UU: units digit, TT: tens digit, HH: hundreds digit)
295 | // 0xUU00TTHH00 * (100 * 0x1000000 + 10 * 0x10000 + 1) =
296 | // 0x000000UU00TTHH00 + 0x00UU00TTHH000000 * 10 + 0xUU00TTHH00000000 * 100
297 | long absValue = ((digits * 0x640a0001) >>> 32) & 0x3FF;
298 | long value = (absValue ^ signed) - signed;
299 | return (int) value;
300 | }
301 |
302 | private static long findDelimiter(long word) {
303 | long input = word ^ 0x3B3B3B3B3B3B3B3BL;
304 | long tmp = (input - 0x0101010101010101L) & ~input & 0x8080808080808080L;
305 | return tmp;
306 | }
307 |
308 | private static Result newEntry(Result[] results, long nameAddress, int hash, int nameLength, Scanner scanner) {
309 | Result r = new Result();
310 | results[hash] = r;
311 | long[] name = new long[(nameLength / Long.BYTES) + 1];
312 | int pos = 0;
313 | int i = 0;
314 | for (; i < nameLength + 1 - Long.BYTES; i += Long.BYTES) {
315 | name[pos++] = scanner.getLongAt(nameAddress + i);
316 | }
317 |
318 | if (pos > 0) {
319 | r.secondLastNameLong = name[pos - 1];
320 | }
321 |
322 | int remainingShift = (64 - (nameLength + 1 - i) << 3);
323 | long lastWord = (scanner.getLongAt(nameAddress + i) << remainingShift);
324 | r.lastNameLong = lastWord;
325 | name[pos] = lastWord >> remainingShift;
326 | r.name = name;
327 | return r;
328 | }
329 |
330 | private static long[] getSegments(int numberOfChunks) throws IOException {
331 | try (var fileChannel = FileChannel.open(Path.of(FILE), StandardOpenOption.READ)) {
332 | long fileSize = fileChannel.size();
333 | long segmentSize = (fileSize + numberOfChunks - 1) / numberOfChunks;
334 | long[] chunks = new long[numberOfChunks + 1];
335 | long mappedAddress = fileChannel.map(FileChannel.MapMode.READ_ONLY, 0, fileSize, java.lang.foreign.Arena.global()).address();
336 | chunks[0] = mappedAddress;
337 | long endAddress = mappedAddress + fileSize;
338 | Scanner s = new Scanner(mappedAddress, mappedAddress + fileSize);
339 | for (int i = 1; i < numberOfChunks; ++i) {
340 | long chunkAddress = mappedAddress + i * segmentSize;
341 | // Align to first row start.
342 | while (chunkAddress < endAddress && (s.getLongAt(chunkAddress++) & 0xFF) != '\n')
343 | ;
344 | chunks[i] = Math.min(chunkAddress, endAddress);
345 | }
346 | chunks[numberOfChunks] = endAddress;
347 | return chunks;
348 | }
349 | }
350 |
351 | private static class Scanner {
352 |
353 | private static final sun.misc.Unsafe UNSAFE = initUnsafe();
354 |
355 | private static sun.misc.Unsafe initUnsafe() {
356 | try {
357 | java.lang.reflect.Field theUnsafe = sun.misc.Unsafe.class.getDeclaredField("theUnsafe");
358 | theUnsafe.setAccessible(true);
359 | return (sun.misc.Unsafe) theUnsafe.get(sun.misc.Unsafe.class);
360 | }
361 | catch (NoSuchFieldException | IllegalAccessException e) {
362 | throw new RuntimeException(e);
363 | }
364 | }
365 |
366 | long pos, end;
367 |
368 | public Scanner(long start, long end) {
369 | this.pos = start;
370 | this.end = end;
371 | }
372 |
373 | boolean hasNext() {
374 | return pos < end;
375 | }
376 |
377 | long pos() {
378 | return pos;
379 | }
380 |
381 | void add(long delta) {
382 | pos += delta;
383 | }
384 |
385 | long getLong() {
386 | return UNSAFE.getLong(pos);
387 | }
388 |
389 | long getLongAt(long pos) {
390 | return UNSAFE.getLong(pos);
391 | }
392 |
393 | void setPos(long l) {
394 | this.pos = l;
395 | }
396 | }
397 | }
398 |
--------------------------------------------------------------------------------
/src/main/java/dev/morling/onebrc/CalculateAverage_thomaswue_v8.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2023 The original authors
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package dev.morling.onebrc;
17 |
18 | import java.io.IOException;
19 | import java.nio.ByteBuffer;
20 | import java.nio.ByteOrder;
21 | import java.nio.channels.FileChannel;
22 | import java.nio.charset.StandardCharsets;
23 | import java.nio.file.Path;
24 | import java.nio.file.StandardOpenOption;
25 | import java.util.ArrayList;
26 | import java.util.Arrays;
27 | import java.util.List;
28 | import java.util.TreeMap;
29 | import java.util.concurrent.atomic.AtomicLong;
30 |
31 | /**
32 | * Simple solution that memory maps the input file, then splits it into one segment per available core and uses
33 | * sun.misc.Unsafe to directly access the mapped memory. Uses a long at a time when checking for collision.
34 | *
35 | * Runs in 0.41s on my Intel i9-13900K
36 | * Perf stats:
37 | * 25,286,227,376 cpu_core/cycles/
38 | * 26,833,723,225 cpu_atom/cycles/
39 | */
40 | public class CalculateAverage_thomaswue_v8 {
41 | private static final String FILE = "./measurements.txt";
42 | private static final int MIN_TEMP = -999;
43 | private static final int MAX_TEMP = 999;
44 |
45 | // Holding the current result for a single city.
46 | private static class Result {
47 | long lastNameLong, secondLastNameLong;
48 | long min, max;
49 | long sum;
50 | int count;
51 | long[] name;
52 | String nameAsString;
53 |
54 | private Result() {
55 | this.min = MAX_TEMP;
56 | this.max = MIN_TEMP;
57 | }
58 |
59 | public String toString() {
60 | return round(((double) min) / 10.0) + "/" + round((((double) sum) / 10.0) / count) + "/" + round(((double) max) / 10.0);
61 | }
62 |
63 | private static double round(double value) {
64 | return Math.round(value * 10.0) / 10.0;
65 | }
66 |
67 | // Accumulate another result into this one.
68 | private void add(Result other) {
69 | if (other.min < min) {
70 | min = other.min;
71 | }
72 | if (other.max > max) {
73 | max = other.max;
74 | }
75 | sum += other.sum;
76 | count += other.count;
77 | }
78 |
79 | public String calcName() {
80 | if (nameAsString == null) {
81 | ByteBuffer bb = ByteBuffer.allocate(name.length * Long.BYTES).order(ByteOrder.nativeOrder());
82 | bb.asLongBuffer().put(name);
83 | byte[] array = bb.array();
84 | int i = 0;
85 | while (array[i++] != ';')
86 | ;
87 | nameAsString = new String(array, 0, i - 1, StandardCharsets.UTF_8);
88 | }
89 | return nameAsString;
90 | }
91 | }
92 |
93 | public static void main(String[] args) throws IOException, InterruptedException {
94 | if (args.length == 0 || !("--worker".equals(args[0]))) {
95 | spawnWorker();
96 | return;
97 | }
98 | // Calculate input segments.
99 | int numberOfWorkers = Runtime.getRuntime().availableProcessors();
100 | final AtomicLong cursor = new AtomicLong();
101 | final long fileEnd;
102 | final long fileStart;
103 |
104 | try (var fileChannel = FileChannel.open(Path.of(FILE), StandardOpenOption.READ)) {
105 | long fileSize = fileChannel.size();
106 | fileStart = fileChannel.map(FileChannel.MapMode.READ_ONLY, 0, fileSize, java.lang.foreign.Arena.global()).address();
107 | cursor.set(fileStart);
108 | fileEnd = fileStart + fileSize;
109 | }
110 |
111 | // Parallel processing of segments.
112 | Thread[] threads = new Thread[numberOfWorkers];
113 | List[] allResults = new List[numberOfWorkers];
114 | for (int i = 0; i < threads.length; ++i) {
115 | final int index = i;
116 | threads[i] = new Thread(() -> {
117 | Result[] resultArray = parseLoop(cursor, fileEnd, fileStart);
118 | List results = new ArrayList<>(500);
119 | for (Result r : resultArray) {
120 | if (r != null) {
121 | r.calcName();
122 | results.add(r);
123 | }
124 | }
125 | allResults[index] = results;
126 | });
127 | threads[i].start();
128 | }
129 |
130 | for (Thread thread : threads) {
131 | thread.join();
132 | }
133 |
134 | // Final output.
135 | System.out.println(accumulateResults(allResults));
136 | System.out.close();
137 | }
138 |
139 | private static void spawnWorker() throws IOException {
140 | ProcessHandle.Info info = ProcessHandle.current().info();
141 | ArrayList workerCommand = new ArrayList<>();
142 | info.command().ifPresent(workerCommand::add);
143 | info.arguments().ifPresent(args -> workerCommand.addAll(Arrays.asList(args)));
144 | workerCommand.add("--worker");
145 | new ProcessBuilder().command(workerCommand).inheritIO().redirectOutput(ProcessBuilder.Redirect.PIPE)
146 | .start().getInputStream().transferTo(System.out);
147 | }
148 |
149 | // Accumulate results sequentially for simplicity.
150 | private static TreeMap accumulateResults(List[] allResults) {
151 | TreeMap result = new TreeMap<>();
152 | for (List resultArr : allResults) {
153 | for (Result r : resultArr) {
154 | String name = r.calcName();
155 | Result current = result.putIfAbsent(name, r);
156 | if (current != null) {
157 | current.add(r);
158 | }
159 | }
160 | }
161 | return result;
162 | }
163 |
164 | private static Result findResult(long initialWord, long initialPos, Scanner scanner, Result[] results) {
165 |
166 | Result existingResult;
167 | long word = initialWord;
168 | long pos = initialPos;
169 | long hash;
170 | long nameAddress = scanner.pos();
171 |
172 | // Search for ';', one long at a time.
173 | if (pos != 0) {
174 | pos = Long.numberOfTrailingZeros(pos) >>> 3;
175 | scanner.add(pos);
176 | word = mask(word, pos);
177 | hash = word;
178 |
179 | int index = hashToIndex(hash, results);
180 | existingResult = results[index];
181 |
182 | if (existingResult != null && existingResult.lastNameLong == word) {
183 | return existingResult;
184 | }
185 | else {
186 | scanner.setPos(nameAddress + pos);
187 | }
188 | }
189 | else {
190 | scanner.add(8);
191 | hash = word;
192 | long prevWord = word;
193 | word = scanner.getLong();
194 | pos = findDelimiter(word);
195 | if (pos != 0) {
196 | pos = Long.numberOfTrailingZeros(pos) >>> 3;
197 | scanner.add(pos);
198 | word = mask(word, pos);
199 | hash ^= word;
200 | int index = hashToIndex(hash, results);
201 | existingResult = results[index];
202 |
203 | if (existingResult != null && existingResult.lastNameLong == word && existingResult.secondLastNameLong == prevWord) {
204 | return existingResult;
205 | }
206 | else {
207 | scanner.setPos(nameAddress + pos + 8);
208 | }
209 | }
210 | else {
211 | scanner.add(8);
212 | hash ^= word;
213 | while (true) {
214 | word = scanner.getLong();
215 | pos = findDelimiter(word);
216 | if (pos != 0) {
217 | pos = Long.numberOfTrailingZeros(pos) >>> 3;
218 | scanner.add(pos);
219 | word = mask(word, pos);
220 | hash ^= word;
221 | break;
222 | }
223 | else {
224 | scanner.add(8);
225 | hash ^= word;
226 | }
227 | }
228 | }
229 | }
230 |
231 | // Save length of name for later.
232 | int nameLength = (int) (scanner.pos() - nameAddress);
233 |
234 | // Final calculation for index into hash table.
235 | int tableIndex = hashToIndex(hash, results);
236 | outer: while (true) {
237 | existingResult = results[tableIndex];
238 | if (existingResult == null) {
239 | existingResult = newEntry(results, nameAddress, tableIndex, nameLength, scanner);
240 | }
241 | // Check for collision.
242 | int i = 0;
243 | long[] name = existingResult.name;
244 | for (; i < nameLength + 1 - 8; i += 8) {
245 | if (scanner.getLongAt(i, name) != scanner.getLongAt(nameAddress + i)) {
246 | tableIndex = (tableIndex + 31) & (results.length - 1);
247 | continue outer;
248 | }
249 | }
250 |
251 | int remainingShift = (64 - (nameLength + 1 - i) << 3);
252 | if (((existingResult.lastNameLong ^ (scanner.getLongAt(nameAddress + i) << remainingShift)) == 0)) {
253 | break;
254 | }
255 | else {
256 | // Collision error, try next.
257 | tableIndex = (tableIndex + 31) & (results.length - 1);
258 | }
259 | }
260 | return existingResult;
261 | }
262 |
263 | private static long nextNL(long prev) {
264 | while (true) {
265 | long currentWord = Scanner.UNSAFE.getLong(prev);
266 | long pos = findNewLine(currentWord);
267 | if (pos != 0) {
268 | prev += Long.numberOfTrailingZeros(pos) >>> 3;
269 | break;
270 | }
271 | else {
272 | prev += 8;
273 | }
274 | }
275 | return prev;
276 | }
277 |
278 | private static final int SEGMENT_SIZE = 1024 * 1024 * 2;
279 |
280 | // Main parse loop.
281 | private static Result[] parseLoop(AtomicLong counter, long fileEnd, long fileStart) {
282 | Result[] results = new Result[1 << 17];
283 |
284 | while (true) {
285 | long current = counter.addAndGet(SEGMENT_SIZE) - SEGMENT_SIZE;
286 |
287 | if (current >= fileEnd) {
288 | return results;
289 | }
290 |
291 | long segmentEnd = nextNL(Math.min(fileEnd - 1, current + SEGMENT_SIZE));
292 | long segmentStart;
293 | if (current == fileStart) {
294 | segmentStart = current;
295 | }
296 | else {
297 | segmentStart = nextNL(current) + 1;
298 | }
299 |
300 | long dist = (segmentEnd - segmentStart) / 3;
301 | long midPoint1 = nextNL(segmentStart + dist);
302 | long midPoint2 = nextNL(segmentStart + dist + dist);
303 |
304 | Scanner scanner1 = new Scanner(segmentStart, midPoint1);
305 | Scanner scanner2 = new Scanner(midPoint1 + 1, midPoint2);
306 | Scanner scanner3 = new Scanner(midPoint2 + 1, segmentEnd);
307 | while (true) {
308 | if (!scanner1.hasNext()) {
309 | break;
310 | }
311 | if (!scanner2.hasNext()) {
312 | break;
313 | }
314 | if (!scanner3.hasNext()) {
315 | break;
316 | }
317 |
318 | long word1 = scanner1.getLong();
319 | long word2 = scanner2.getLong();
320 | long word3 = scanner3.getLong();
321 | long pos1 = findDelimiter(word1);
322 | long pos2 = findDelimiter(word2);
323 | long pos3 = findDelimiter(word3);
324 | Result existingResult1 = findResult(word1, pos1, scanner1, results);
325 | Result existingResult2 = findResult(word2, pos2, scanner2, results);
326 | Result existingResult3 = findResult(word3, pos3, scanner3, results);
327 | long number1 = scanNumber(scanner1);
328 | long number2 = scanNumber(scanner2);
329 | long number3 = scanNumber(scanner3);
330 | record(existingResult1, number1);
331 | record(existingResult2, number2);
332 | record(existingResult3, number3);
333 | }
334 |
335 | while (scanner1.hasNext()) {
336 | long word = scanner1.getLong();
337 | long pos = findDelimiter(word);
338 | record(findResult(word, pos, scanner1, results), scanNumber(scanner1));
339 | }
340 |
341 | while (scanner2.hasNext()) {
342 | long word = scanner2.getLong();
343 | long pos = findDelimiter(word);
344 | record(findResult(word, pos, scanner2, results), scanNumber(scanner2));
345 | }
346 |
347 | while (scanner3.hasNext()) {
348 | long word = scanner3.getLong();
349 | long pos = findDelimiter(word);
350 | record(findResult(word, pos, scanner3, results), scanNumber(scanner3));
351 | }
352 | }
353 | }
354 |
355 | private static long scanNumber(Scanner scanPtr) {
356 | scanPtr.add(1);
357 | long numberWord = scanPtr.getLong();
358 | int decimalSepPos = Long.numberOfTrailingZeros(~numberWord & 0x10101000);
359 | long number = convertIntoNumber(decimalSepPos, numberWord);
360 | scanPtr.add((decimalSepPos >>> 3) + 3);
361 | return number;
362 | }
363 |
364 | private static void record(Result existingResult, long number) {
365 | if (number < existingResult.min) {
366 | existingResult.min = number;
367 | }
368 | if (number > existingResult.max) {
369 | existingResult.max = number;
370 | }
371 | existingResult.sum += number;
372 | existingResult.count++;
373 | }
374 |
375 | private static int hashToIndex(long hash, Result[] results) {
376 | long hashAsInt = hash ^ (hash >>> 37) ^ (hash >>> 17);
377 | return (int) (hashAsInt & (results.length - 1));
378 | }
379 |
380 | private static long mask(long word, long pos) {
381 | return (word << ((7 - pos) << 3));
382 | }
383 |
384 | // Special method to convert a number in the ascii number into an int without branches created by Quan Anh Mai.
385 | private static long convertIntoNumber(int decimalSepPos, long numberWord) {
386 | int shift = 28 - decimalSepPos;
387 | // signed is -1 if negative, 0 otherwise
388 | long signed = (~numberWord << 59) >> 63;
389 | long designMask = ~(signed & 0xFF);
390 | // Align the number to a specific position and transform the ascii to digit value
391 | long digits = ((numberWord & designMask) << shift) & 0x0F000F0F00L;
392 | // Now digits is in the form 0xUU00TTHH00 (UU: units digit, TT: tens digit, HH: hundreds digit)
393 | // 0xUU00TTHH00 * (100 * 0x1000000 + 10 * 0x10000 + 1) =
394 | // 0x000000UU00TTHH00 + 0x00UU00TTHH000000 * 10 + 0xUU00TTHH00000000 * 100
395 | long absValue = ((digits * 0x640a0001) >>> 32) & 0x3FF;
396 | return (absValue ^ signed) - signed;
397 | }
398 |
399 | private static long findDelimiter(long word) {
400 | long input = word ^ 0x3B3B3B3B3B3B3B3BL;
401 | long tmp = (input - 0x0101010101010101L) & ~input & 0x8080808080808080L;
402 | return tmp;
403 | }
404 |
405 | private static long findNewLine(long word) {
406 | long input = word ^ 0x0A0A0A0A0A0A0A0AL;
407 | long tmp = (input - 0x0101010101010101L) & ~input & 0x8080808080808080L;
408 | return tmp;
409 | }
410 |
411 | private static Result newEntry(Result[] results, long nameAddress, int hash, int nameLength, Scanner scanner) {
412 | Result r = new Result();
413 | results[hash] = r;
414 | long[] name = new long[(nameLength / Long.BYTES) + 1];
415 | int pos = 0;
416 | int i = 0;
417 | for (; i < nameLength + 1 - Long.BYTES; i += Long.BYTES) {
418 | name[pos++] = scanner.getLongAt(nameAddress + i);
419 | }
420 |
421 | if (pos > 0) {
422 | r.secondLastNameLong = name[pos - 1];
423 | }
424 |
425 | int remainingShift = (64 - (nameLength + 1 - i) << 3);
426 | long lastWord = (scanner.getLongAt(nameAddress + i) << remainingShift);
427 | r.lastNameLong = lastWord;
428 | name[pos] = lastWord >> remainingShift;
429 | r.name = name;
430 | return r;
431 | }
432 |
433 | private static class Scanner {
434 |
435 | private static final sun.misc.Unsafe UNSAFE = initUnsafe();
436 |
437 | private static sun.misc.Unsafe initUnsafe() {
438 | try {
439 | java.lang.reflect.Field theUnsafe = sun.misc.Unsafe.class.getDeclaredField("theUnsafe");
440 | theUnsafe.setAccessible(true);
441 | return (sun.misc.Unsafe) theUnsafe.get(sun.misc.Unsafe.class);
442 | }
443 | catch (NoSuchFieldException | IllegalAccessException e) {
444 | throw new RuntimeException(e);
445 | }
446 | }
447 |
448 | long pos, end;
449 |
450 | public Scanner(long start, long end) {
451 | this.pos = start;
452 | this.end = end;
453 | }
454 |
455 | boolean hasNext() {
456 | return pos < end;
457 | }
458 |
459 | long pos() {
460 | return pos;
461 | }
462 |
463 | void add(long delta) {
464 | pos += delta;
465 | }
466 |
467 | long getLong() {
468 | return UNSAFE.getLong(pos);
469 | }
470 |
471 | long getLongAt(long pos) {
472 | return UNSAFE.getLong(pos);
473 | }
474 |
475 | long getLongAt(long pos, long[] array) {
476 | return UNSAFE.getLong(array, pos + sun.misc.Unsafe.ARRAY_LONG_BASE_OFFSET);
477 | }
478 |
479 | void setPos(long l) {
480 | this.pos = l;
481 | }
482 | }
483 | }
484 |
--------------------------------------------------------------------------------
/src/main/java/dev/morling/onebrc/CalculateAverage_thomaswue_v9.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2023 The original authors
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package dev.morling.onebrc;
17 |
18 | import java.io.IOException;
19 | import java.nio.channels.FileChannel;
20 | import java.util.*;
21 | import java.util.concurrent.atomic.AtomicLong;
22 |
23 | /**
24 | * The solution starts a child worker process for the actual work such that clean up of the memory mapping can occur
25 | * while the main process already returns with the result. The worker then memory maps the input file, creates a worker
26 | * thread per available core, and then processes segments of size {@link #SEGMENT_SIZE} at a time. The segments are
27 | * split into 3 parts and cursors for each of those parts are processing the segment simultaneously in the same thread.
28 | * Results are accumulated into {@link Result} objects and a tree map is used to sequentially accumulate the results in
29 | * the end.
30 | * Runs in 0.39s on an Intel i9-13900K.
31 | * Credit:
32 | * Quan Anh Mai for branchless number parsing code
33 | * Alfonso² Peterssen for suggesting memory mapping with unsafe and the subprocess idea
34 | * Artsiom Korzun for showing the benefits of work stealing at 2MB segments instead of equal split between workers
35 | */
36 | public class CalculateAverage_thomaswue_v9 {
37 | private static final String FILE = "./measurements.txt";
38 | private static final int MIN_TEMP = -999;
39 | private static final int MAX_TEMP = 999;
40 | private static final int MAX_NAME_LENGTH = 100;
41 | private static final int MAX_CITIES = 10000;
42 | private static final int SEGMENT_SIZE = 1 << 21;
43 | private static final int HASH_TABLE_SIZE = 1 << 17;
44 |
45 | public static void main(String[] args) throws IOException, InterruptedException {
46 | // Start worker subprocess if this process is not the worker.
47 | if (args.length == 0 || !("--worker".equals(args[0]))) {
48 | spawnWorker();
49 | return;
50 | }
51 |
52 | int numberOfWorkers = Runtime.getRuntime().availableProcessors();
53 | try (var fileChannel = FileChannel.open(java.nio.file.Path.of(FILE), java.nio.file.StandardOpenOption.READ)) {
54 | long fileSize = fileChannel.size();
55 | final long fileStart = fileChannel.map(FileChannel.MapMode.READ_ONLY, 0, fileSize, java.lang.foreign.Arena.global()).address();
56 | final long fileEnd = fileStart + fileSize;
57 | final AtomicLong cursor = new AtomicLong(fileStart);
58 |
59 | // Parallel processing of segments.
60 | Thread[] threads = new Thread[numberOfWorkers];
61 | List[] allResults = new List[numberOfWorkers];
62 | for (int i = 0; i < threads.length; ++i) {
63 | final int index = i;
64 | threads[i] = new Thread(() -> {
65 | List results = new ArrayList<>(MAX_CITIES);
66 | parseLoop(cursor, fileEnd, fileStart, results);
67 | allResults[index] = results;
68 | });
69 | threads[i].start();
70 | }
71 | for (Thread thread : threads) {
72 | thread.join();
73 | }
74 |
75 | // Final output.
76 | System.out.println(accumulateResults(allResults));
77 | System.out.close();
78 | }
79 | }
80 |
81 | private static void spawnWorker() throws IOException {
82 | ProcessHandle.Info info = ProcessHandle.current().info();
83 | ArrayList workerCommand = new ArrayList<>();
84 | info.command().ifPresent(workerCommand::add);
85 | info.arguments().ifPresent(args -> workerCommand.addAll(Arrays.asList(args)));
86 | workerCommand.add("--worker");
87 | new ProcessBuilder().command(workerCommand).inheritIO().redirectOutput(ProcessBuilder.Redirect.PIPE)
88 | .start().getInputStream().transferTo(System.out);
89 | }
90 |
91 | private static TreeMap accumulateResults(List[] allResults) {
92 | TreeMap result = new TreeMap<>();
93 | for (List resultArr : allResults) {
94 | for (Result r : resultArr) {
95 | Result current = result.putIfAbsent(r.calcName(), r);
96 | if (current != null) {
97 | current.accumulate(r);
98 | }
99 | }
100 | }
101 | return result;
102 | }
103 |
104 | private static void parseLoop(AtomicLong counter, long fileEnd, long fileStart, List collectedResults) {
105 | Result[] results = new Result[HASH_TABLE_SIZE];
106 | while (true) {
107 | long current = counter.addAndGet(SEGMENT_SIZE) - SEGMENT_SIZE;
108 | if (current >= fileEnd) {
109 | return;
110 | }
111 |
112 | long segmentEnd = nextNewLine(Math.min(fileEnd - 1, current + SEGMENT_SIZE));
113 | long segmentStart;
114 | if (current == fileStart) {
115 | segmentStart = current;
116 | }
117 | else {
118 | segmentStart = nextNewLine(current) + 1;
119 | }
120 |
121 | long dist = (segmentEnd - segmentStart) / 3;
122 | long midPoint1 = nextNewLine(segmentStart + dist);
123 | long midPoint2 = nextNewLine(segmentStart + dist + dist);
124 |
125 | Scanner scanner1 = new Scanner(segmentStart, midPoint1);
126 | Scanner scanner2 = new Scanner(midPoint1 + 1, midPoint2);
127 | Scanner scanner3 = new Scanner(midPoint2 + 1, segmentEnd);
128 | while (true) {
129 | if (!scanner1.hasNext()) {
130 | break;
131 | }
132 | if (!scanner2.hasNext()) {
133 | break;
134 | }
135 | if (!scanner3.hasNext()) {
136 | break;
137 | }
138 | long word1 = scanner1.getLong();
139 | long word2 = scanner2.getLong();
140 | long word3 = scanner3.getLong();
141 | long delimiterMask1 = findDelimiter(word1);
142 | long delimiterMask2 = findDelimiter(word2);
143 | long delimiterMask3 = findDelimiter(word3);
144 | Result existingResult1 = findResult(word1, delimiterMask1, scanner1, results, collectedResults);
145 | Result existingResult2 = findResult(word2, delimiterMask2, scanner2, results, collectedResults);
146 | Result existingResult3 = findResult(word3, delimiterMask3, scanner3, results, collectedResults);
147 | long number1 = scanNumber(scanner1);
148 | long number2 = scanNumber(scanner2);
149 | long number3 = scanNumber(scanner3);
150 | record(existingResult1, number1);
151 | record(existingResult2, number2);
152 | record(existingResult3, number3);
153 | }
154 |
155 | while (scanner1.hasNext()) {
156 | long word = scanner1.getLong();
157 | long pos = findDelimiter(word);
158 | record(findResult(word, pos, scanner1, results, collectedResults), scanNumber(scanner1));
159 | }
160 | while (scanner2.hasNext()) {
161 | long word = scanner2.getLong();
162 | long pos = findDelimiter(word);
163 | record(findResult(word, pos, scanner2, results, collectedResults), scanNumber(scanner2));
164 | }
165 | while (scanner3.hasNext()) {
166 | long word = scanner3.getLong();
167 | long pos = findDelimiter(word);
168 | record(findResult(word, pos, scanner3, results, collectedResults), scanNumber(scanner3));
169 | }
170 | }
171 | }
172 |
173 | private static Result findResult(long initialWord, long initialDelimiterMask, Scanner scanner, Result[] results, List collectedResults) {
174 | Result existingResult;
175 | long word = initialWord;
176 | long delimiterMask = initialDelimiterMask;
177 | long hash;
178 | long nameAddress = scanner.pos();
179 |
180 | // Search for ';', one long at a time. There are two common cases that a specially treated:
181 | // (b) the ';' is found in the first 16 bytes
182 | if (delimiterMask != 0) {
183 | // Special case for when the ';' is found in the first 8 bytes.
184 | int trailingZeros = Long.numberOfTrailingZeros(delimiterMask);
185 | word = (word << (63 - trailingZeros));
186 | scanner.add(trailingZeros >>> 3);
187 | hash = word;
188 | existingResult = results[hashToIndex(hash, results)];
189 | if (existingResult != null && existingResult.lastNameLong == word) {
190 | return existingResult;
191 | }
192 | }
193 | else {
194 | // Special case for when the ';' is found in bytes 9-16.
195 | hash = word;
196 | long prevWord = word;
197 | scanner.add(8);
198 | word = scanner.getLong();
199 | delimiterMask = findDelimiter(word);
200 | if (delimiterMask != 0) {
201 | int trailingZeros = Long.numberOfTrailingZeros(delimiterMask);
202 | word = (word << (63 - trailingZeros));
203 | scanner.add(trailingZeros >>> 3);
204 | hash ^= word;
205 | existingResult = results[hashToIndex(hash, results)];
206 | if (existingResult != null && existingResult.lastNameLong == word && existingResult.secondLastNameLong == prevWord) {
207 | return existingResult;
208 | }
209 | }
210 | else {
211 | // Slow-path for when the ';' could not be found in the first 16 bytes.
212 | scanner.add(8);
213 | hash ^= word;
214 | while (true) {
215 | word = scanner.getLong();
216 | delimiterMask = findDelimiter(word);
217 | if (delimiterMask != 0) {
218 | int trailingZeros = Long.numberOfTrailingZeros(delimiterMask);
219 | word = (word << (63 - trailingZeros));
220 | scanner.add(trailingZeros >>> 3);
221 | hash ^= word;
222 | break;
223 | }
224 | else {
225 | scanner.add(8);
226 | hash ^= word;
227 | }
228 | }
229 | }
230 | }
231 |
232 | // Save length of name for later.
233 | int nameLength = (int) (scanner.pos() - nameAddress);
234 |
235 | // Final calculation for index into hash table.
236 | int tableIndex = hashToIndex(hash, results);
237 | outer: while (true) {
238 | existingResult = results[tableIndex];
239 | if (existingResult == null) {
240 | existingResult = newEntry(results, nameAddress, tableIndex, nameLength, scanner, collectedResults);
241 | }
242 | // Check for collision.
243 | int i = 0;
244 | for (; i < nameLength + 1 - 8; i += 8) {
245 | if (scanner.getLongAt(existingResult.nameAddress + i) != scanner.getLongAt(nameAddress + i)) {
246 | // Collision error, try next.
247 | tableIndex = (tableIndex + 31) & (results.length - 1);
248 | continue outer;
249 | }
250 | }
251 |
252 | int remainingShift = (64 - (nameLength + 1 - i) << 3);
253 | if (existingResult.lastNameLong == (scanner.getLongAt(nameAddress + i) << remainingShift)) {
254 | break;
255 | }
256 | else {
257 | // Collision error, try next.
258 | tableIndex = (tableIndex + 31) & (results.length - 1);
259 | }
260 | }
261 | return existingResult;
262 | }
263 |
264 | private static long nextNewLine(long prev) {
265 | while (true) {
266 | long currentWord = Scanner.UNSAFE.getLong(prev);
267 | long input = currentWord ^ 0x0A0A0A0A0A0A0A0AL;
268 | long pos = (input - 0x0101010101010101L) & ~input & 0x8080808080808080L;
269 | if (pos != 0) {
270 | prev += Long.numberOfTrailingZeros(pos) >>> 3;
271 | break;
272 | }
273 | else {
274 | prev += 8;
275 | }
276 | }
277 | return prev;
278 | }
279 |
280 | private static long scanNumber(Scanner scanPtr) {
281 | long numberWord = scanPtr.getLongAt(scanPtr.pos() + 1);
282 | int decimalSepPos = Long.numberOfTrailingZeros(~numberWord & 0x10101000L);
283 | long number = convertIntoNumber(decimalSepPos, numberWord);
284 | scanPtr.add((decimalSepPos >>> 3) + 4);
285 | return number;
286 | }
287 |
288 | private static void record(Result existingResult, long number) {
289 | if (number < existingResult.min) {
290 | existingResult.min = (short) number;
291 | }
292 | if (number > existingResult.max) {
293 | existingResult.max = (short) number;
294 | }
295 | existingResult.sum += number;
296 | existingResult.count++;
297 | }
298 |
299 | private static int hashToIndex(long hash, Result[] results) {
300 | long hashAsInt = hash ^ (hash >>> 37) ^ (hash >>> 17);
301 | return (int) (hashAsInt & (results.length - 1));
302 | }
303 |
304 | // Special method to convert a number in the ascii number into an int without branches created by Quan Anh Mai.
305 | private static long convertIntoNumber(int decimalSepPos, long numberWord) {
306 | int shift = 28 - decimalSepPos;
307 | // signed is -1 if negative, 0 otherwise
308 | long signed = (~numberWord << 59) >> 63;
309 | long designMask = ~(signed & 0xFF);
310 | // Align the number to a specific position and transform the ascii to digit value
311 | long digits = ((numberWord & designMask) << shift) & 0x0F000F0F00L;
312 | // Now digits is in the form 0xUU00TTHH00 (UU: units digit, TT: tens digit, HH: hundreds digit)
313 | // 0xUU00TTHH00 * (100 * 0x1000000 + 10 * 0x10000 + 1) =
314 | // 0x000000UU00TTHH00 + 0x00UU00TTHH000000 * 10 + 0xUU00TTHH00000000 * 100
315 | long absValue = ((digits * 0x640a0001) >>> 32) & 0x3FF;
316 | return (absValue ^ signed) - signed;
317 | }
318 |
319 | private static long findDelimiter(long word) {
320 | long input = word ^ 0x3B3B3B3B3B3B3B3BL;
321 | return (input - 0x0101010101010101L) & ~input & 0x8080808080808080L;
322 | }
323 |
324 | private static Result newEntry(Result[] results, long nameAddress, int hash, int nameLength, Scanner scanner, List collectedResults) {
325 | Result r = new Result();
326 | results[hash] = r;
327 | int i = 0;
328 | for (; i < nameLength + 1 - Long.BYTES; i += Long.BYTES) {
329 | }
330 | if (nameLength + 1 > 8) {
331 | r.secondLastNameLong = scanner.getLongAt(nameAddress + i - 8);
332 | }
333 | int remainingShift = (64 - (nameLength + 1 - i) << 3);
334 | r.lastNameLong = (scanner.getLongAt(nameAddress + i) << remainingShift);
335 | r.nameAddress = nameAddress;
336 | collectedResults.add(r);
337 | return r;
338 | }
339 |
340 | private static final class Result {
341 | long lastNameLong, secondLastNameLong;
342 | short min, max;
343 | int count;
344 | long sum;
345 | long nameAddress;
346 |
347 | private Result() {
348 | this.min = MAX_TEMP;
349 | this.max = MIN_TEMP;
350 | }
351 |
352 | public String toString() {
353 | return round(((double) min) / 10.0) + "/" + round((((double) sum) / 10.0) / count) + "/" + round(((double) max) / 10.0);
354 | }
355 |
356 | private static double round(double value) {
357 | return Math.round(value * 10.0) / 10.0;
358 | }
359 |
360 | private void accumulate(Result other) {
361 | if (other.min < min) {
362 | min = other.min;
363 | }
364 | if (other.max > max) {
365 | max = other.max;
366 | }
367 | sum += other.sum;
368 | count += other.count;
369 | }
370 |
371 | public String calcName() {
372 | Scanner scanner = new Scanner(nameAddress, nameAddress + MAX_NAME_LENGTH + 1);
373 | int nameLength = 0;
374 | while (scanner.getByteAt(nameAddress + nameLength) != ';') {
375 | nameLength++;
376 | }
377 | byte[] array = new byte[nameLength];
378 | for (int i = 0; i < nameLength; ++i) {
379 | array[i] = scanner.getByteAt(nameAddress + i);
380 | }
381 | return new String(array, java.nio.charset.StandardCharsets.UTF_8);
382 | }
383 | }
384 |
385 | private static final class Scanner {
386 | private static final sun.misc.Unsafe UNSAFE = initUnsafe();
387 | private long pos;
388 | private final long end;
389 |
390 | private static sun.misc.Unsafe initUnsafe() {
391 | try {
392 | java.lang.reflect.Field theUnsafe = sun.misc.Unsafe.class.getDeclaredField("theUnsafe");
393 | theUnsafe.setAccessible(true);
394 | return (sun.misc.Unsafe) theUnsafe.get(sun.misc.Unsafe.class);
395 | }
396 | catch (NoSuchFieldException | IllegalAccessException e) {
397 | throw new RuntimeException(e);
398 | }
399 | }
400 |
401 | public Scanner(long start, long end) {
402 | this.pos = start;
403 | this.end = end;
404 | }
405 |
406 | boolean hasNext() {
407 | return pos < end;
408 | }
409 |
410 | long pos() {
411 | return pos;
412 | }
413 |
414 | void add(long delta) {
415 | pos += delta;
416 | }
417 |
418 | long getLong() {
419 | return UNSAFE.getLong(pos);
420 | }
421 |
422 | long getLongAt(long pos) {
423 | return UNSAFE.getLong(pos);
424 | }
425 |
426 | byte getByteAt(long pos) {
427 | return UNSAFE.getByte(pos);
428 | }
429 | }
430 | }
431 |
--------------------------------------------------------------------------------
/src/main/java/org/rschwietzke/FastRandom.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2023 The original authors
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | package org.rschwietzke;
17 |
18 | /**
19 | * Ultra-fast pseudo random generator that is not synchronized!
20 | * Don't use anything from Random by inheritance, this will inherit
21 | * a volatile! Not my idea, copyied in parts some demo random
22 | * generator lessons.
23 | *
24 | * @author rschwietzke
25 | *
26 | */
27 | public class FastRandom {
28 | private long seed;
29 |
30 | public FastRandom() {
31 | this.seed = System.currentTimeMillis();
32 | }
33 |
34 | public FastRandom(long seed) {
35 | this.seed = seed;
36 | }
37 |
38 | protected int next(int nbits) {
39 | // N.B. Not thread-safe!
40 | long x = this.seed;
41 | x ^= (x << 21);
42 | x ^= (x >>> 35);
43 | x ^= (x << 4);
44 | this.seed = x;
45 |
46 | x &= ((1L << nbits) - 1);
47 |
48 | return (int) x;
49 | }
50 |
51 | /**
52 | * Borrowed from the JDK
53 | *
54 | * @param bound
55 | * @return
56 | */
57 | public int nextInt(int bound) {
58 | int r = next(31);
59 | int m = bound - 1;
60 | if ((bound & m) == 0) // i.e., bound is a power of 2
61 | r = (int) ((bound * (long) r) >> 31);
62 | else {
63 | for (int u = r; u - (r = u % bound) + m < 0; u = next(31))
64 | ;
65 | }
66 | return r;
67 | }
68 |
69 | /**
70 | * Borrowed from the JDK
71 | * @return
72 | */
73 | public int nextInt() {
74 | return next(32);
75 | }
76 | }
77 |
--------------------------------------------------------------------------------
/src/main/resources/.dontdelete:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomaswue/1brc-steps/886def4efcd7c4bd76fef441aa16f5b1a260054f/src/main/resources/.dontdelete
--------------------------------------------------------------------------------
/src/test/resources/.dontdelete:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thomaswue/1brc-steps/886def4efcd7c4bd76fef441aa16f5b1a260054f/src/test/resources/.dontdelete
--------------------------------------------------------------------------------
/src/test/resources/samples/measurements-1.out:
--------------------------------------------------------------------------------
1 | {Kunming=19.8/19.8/19.8}
2 |
--------------------------------------------------------------------------------
/src/test/resources/samples/measurements-1.txt:
--------------------------------------------------------------------------------
1 | Kunming;19.8
2 |
--------------------------------------------------------------------------------
/src/test/resources/samples/measurements-10.out:
--------------------------------------------------------------------------------
1 | {Adelaide=15.0/15.0/15.0, Cabo San Lucas=14.9/14.9/14.9, Dodoma=22.2/22.2/22.2, Halifax=12.9/12.9/12.9, Karachi=15.4/15.4/15.4, Pittsburgh=9.7/9.7/9.7, Ségou=25.7/25.7/25.7, Tauranga=38.2/38.2/38.2, Xi'an=24.2/24.2/24.2, Zagreb=12.2/12.2/12.2}
2 |
--------------------------------------------------------------------------------
/src/test/resources/samples/measurements-10.txt:
--------------------------------------------------------------------------------
1 | Halifax;12.9
2 | Zagreb;12.2
3 | Cabo San Lucas;14.9
4 | Adelaide;15.0
5 | Ségou;25.7
6 | Pittsburgh;9.7
7 | Karachi;15.4
8 | Xi'an;24.2
9 | Dodoma;22.2
10 | Tauranga;38.2
11 |
--------------------------------------------------------------------------------
/src/test/resources/samples/measurements-2.out:
--------------------------------------------------------------------------------
1 | {Bosaso=19.2/19.2/19.2, Petropavlovsk-Kamchatsky=9.5/9.5/9.5}
2 |
--------------------------------------------------------------------------------
/src/test/resources/samples/measurements-2.txt:
--------------------------------------------------------------------------------
1 | Bosaso;19.2
2 | Petropavlovsk-Kamchatsky;9.5
3 |
--------------------------------------------------------------------------------
/src/test/resources/samples/measurements-20.out:
--------------------------------------------------------------------------------
1 | {Abéché1️⃣🐝🏎️=27.3/27.3/27.3, Almaty1️⃣🐝🏎️=15.3/15.3/15.3, Baghdad1️⃣🐝🏎️=26.0/26.0/26.0, Bangkok1️⃣🐝🏎️=25.6/25.6/25.6, Berlin1️⃣🐝🏎️=-0.3/-0.3/-0.3, Birao1️⃣🐝🏎️=33.5/33.5/33.5, Canberra1️⃣🐝🏎️=5.2/5.2/5.2, Chittagong1️⃣🐝🏎️=12.6/12.6/12.6, Da Nang1️⃣🐝🏎️=33.7/33.7/33.7, Edinburgh1️⃣🐝🏎️=19.8/19.8/19.8, Irkutsk1️⃣🐝🏎️=9.9/9.9/9.9, Lhasa1️⃣🐝🏎️=13.4/13.4/13.4, Lyon1️⃣🐝🏎️=1.8/1.8/1.8, Mogadishu1️⃣🐝🏎️=11.5/11.5/11.5, Nashville1️⃣🐝🏎️=-4.9/-4.9/-4.9, Odesa1️⃣🐝🏎️=6.5/6.5/6.5, Parakou1️⃣🐝🏎️=36.3/36.3/36.3, Tamanrasset1️⃣🐝🏎️=17.9/17.9/17.9, Tirana1️⃣🐝🏎️=27.7/27.7/27.7, Xi'an1️⃣🐝🏎️=17.5/17.5/17.5}
2 |
--------------------------------------------------------------------------------
/src/test/resources/samples/measurements-20.txt:
--------------------------------------------------------------------------------
1 | Odesa1️⃣🐝🏎️;6.5
2 | Canberra1️⃣🐝🏎️;5.2
3 | Lhasa1️⃣🐝🏎️;13.4
4 | Edinburgh1️⃣🐝🏎️;19.8
5 | Da Nang1️⃣🐝🏎️;33.7
6 | Xi'an1️⃣🐝🏎️;17.5
7 | Berlin1️⃣🐝🏎️;-0.3
8 | Tamanrasset1️⃣🐝🏎️;17.9
9 | Abéché1️⃣🐝🏎️;27.3
10 | Baghdad1️⃣🐝🏎️;26.0
11 | Lyon1️⃣🐝🏎️;1.8
12 | Mogadishu1️⃣🐝🏎️;11.5
13 | Bangkok1️⃣🐝🏎️;25.6
14 | Irkutsk1️⃣🐝🏎️;9.9
15 | Parakou1️⃣🐝🏎️;36.3
16 | Almaty1️⃣🐝🏎️;15.3
17 | Birao1️⃣🐝🏎️;33.5
18 | Chittagong1️⃣🐝🏎️;12.6
19 | Tirana1️⃣🐝🏎️;27.7
20 | Nashville1️⃣🐝🏎️;-4.9
21 |
--------------------------------------------------------------------------------
/src/test/resources/samples/measurements-3.out:
--------------------------------------------------------------------------------
1 | {Bosaso=-15.0/1.3/20.0, Petropavlovsk-Kamchatsky=-9.5/0.0/9.5}
2 |
--------------------------------------------------------------------------------
/src/test/resources/samples/measurements-3.txt:
--------------------------------------------------------------------------------
1 | Bosaso;5.0
2 | Bosaso;20.0
3 | Bosaso;-5.0
4 | Bosaso;-15.0
5 | Petropavlovsk-Kamchatsky;9.5
6 | Petropavlovsk-Kamchatsky;-9.5
7 |
--------------------------------------------------------------------------------
/src/test/resources/samples/measurements-boundaries.out:
--------------------------------------------------------------------------------
1 | {Bosaso=-99.9/-99.9/-99.9, Petropavlovsk-Kamchatsky=99.9/99.9/99.9}
2 |
--------------------------------------------------------------------------------
/src/test/resources/samples/measurements-boundaries.txt:
--------------------------------------------------------------------------------
1 | Bosaso;-99.9
2 | Petropavlovsk-Kamchatsky;99.9
3 |
--------------------------------------------------------------------------------
/src/test/resources/samples/measurements-complex-utf8.out:
--------------------------------------------------------------------------------
1 | {B=8.9/8.9/8.9, C=38.9/38.9/38.9, CabindaKermānZunhuaRochesterValenzuelaOrūmīyehWugangShuangqiaoTshikapa=3.0/3.0/3.0, ChesterLobnyaSan LeandroHemeiSolweziGrand BourgKaliboS=23.4/23.4/23.4, MirnaPehčevoRopažiGus=16.7/16.7/16.7, PototanSahuayo de MorelosBambergMosigkauFrancisco BeltrãoJelenia GóraTelêmaco Borb=17.5/17.5/17.5, TanjungpinangKasselHaldiaLuxorLạng SơnAt TājīTaraka=10.6/10.6/10.6, aniCartagoEṭ ṬīraTemerinCormeilles-en-ParisisZawyat ech CheïkhS=25.4/25.4/25.4, burgazAl ḨawīyahSalamancaMbanza KongoNchelengeZhangaözenTurbatMatiMangghystaūMalak=21.5/21.5/21.5, cotánSan Ramón de la Nueva OránWausauGbaweTailaiRochester HillsVilla ElisaToba TekS=11.2/11.2/11.2, eLafayetteAsh Shaţ=14.2/14.2/14.2, en IslandKota BharuCiudad López MateosCelayaVinhDuyunLos Mochis‘AjmānNyalaLarkanaWichitaNishi=11.9/11.9/11.9, epé=28.2/28.2/28.2, hanVarkkallaiPort LokoD=10.9/10.9/10.9, iCoahuitlánRabatJahāngīrpur SālkhaniCamUniversity of California-Santa BarbaraSerravalleTelkathuM=13.4/13.4/13.4, igButeboJuršinciKoaniImdinaNova VasDestrnikVarvarinSkomunGornji PetrovciRibnicaKon TumŠavnikPoul=22.5/22.5/22.5, igButeboJuršinciKoaniImdinaNova VasDestrnikVarvarinSkopunGornji PetrovciRibnicaKon TumŠavnikPodl=11.5/11.5/11.5, igButeboJuršinciKoaniImdinaNova VasDestrnikVarvarinSkopunGornji PetrovciRibnicaKon TumŠavnikPoul=18.5/18.5/18.5, inhoSökeDordrechtPoáLaloG=13.1/13.1/13.1, iudad Melchor MúzquizQuinhámelDa=40.5/40.5/40.5, ixButeboJuršinciKoaniImdinaNova VasDestrnikVarvarinSkomunGornji PetrovciRibnicaKon TumŠavnikPoul=0.1/0.1/0.1, l ‘=14.6/14.6/14.6, lhuleuTacurongNavapolatskPiscoDera Ismail KhanLabéAltamiraCavite CityYevpatoriiaTait=22.8/22.8/22.8, liLoretoPlacentiaAliso ViejoChomaPen-y-Bont ar OgwrCojutepeque=12.4/12.4/12.4, lioúpoliBarahonaHoPhuketLe BardoBuena ParkKayesChampigny-sur-MarneHaskovoChathamBatleyEsteioRe=22.5/22.5/22.5, m el Bo=14.6/14.6/14.6, mazunchaleZrenjaninFouchanaSurtPanč=6.7/6.7/6.7, ngoDübendorfC=11.7/11.7/11.7, nt-A=9.2/9.2/9.2, ntington StationKampong SpeuKakataMoschátoBressoVentspilsSaint-CloudTamboSidi Smai’ilDandenon=14.6/14.6/14.6, oCanagatanHelsinkiJabalpurProvidenceRuchengNizhniy NovgorodAhvāzJeparaShaoyangComayagüe=17.3/17.3/17.3, oGumlāSamā’=14.9/14.9/14.9, os Reyes de SalgadoCinisello BalsamoKashibaH=20.0/20.0/20.0, picuíbaJhang CityTepicJayapuraRio BrancoToyamaFangtingSanandajDelhi CantonmentLinghaiShorāpurToy=13.0/13.0/13.0, raKielSibuYatoParanáSanta ClaraYamagataKatihārBeykozImperat=13.5/13.5/13.5, rhamDera Ghazi KhanMiyazakiBhātpār=21.3/21.3/21.3, rugarhVerāvalAlagoinhasEdremitBandırmaSalavatGandajikaLucapaLeesburgTamaRas Tan=10.9/10.9/10.9, skişeh=12.9/12.9/12.9, venGaopingDunhuaAz Zarqā’SylhetKaihuaCaerdyddJāmnagarFuyuanGayaFlorianópolisC=1.9/1.9/1.9, y-le-MoutierSant’ArpinoPljevljaRo=0.8/0.8/0.8, ça PaulistaDarmstadtZhengdingPindamonhangabaEnschedeGirónUttarpāraHeidelbergK=6.0/6.0/6.0, üSosnowiecTanauanMya=18.4/18.4/18.4, ālSongnimSanto TomasKoiduHoshangābādOpoleNovocheboksarskArarasKhannaPunoKoforiduaAhmadpur E=19.4/19.4/19.4, āng=15.7/15.7/15.7, ġFis=9.6/9.6/9.6, ‘AqabahPembaNowgongQu=12.9/12.9/12.9}
2 |
--------------------------------------------------------------------------------
/src/test/resources/samples/measurements-complex-utf8.txt:
--------------------------------------------------------------------------------
1 | aniCartagoEṭ ṬīraTemerinCormeilles-en-ParisisZawyat ech CheïkhS;25.4
2 | picuíbaJhang CityTepicJayapuraRio BrancoToyamaFangtingSanandajDelhi CantonmentLinghaiShorāpurToy;13.0
3 | lhuleuTacurongNavapolatskPiscoDera Ismail KhanLabéAltamiraCavite CityYevpatoriiaTait;22.8
4 | āng;15.7
5 | hanVarkkallaiPort LokoD;10.9
6 | eLafayetteAsh Shaţ;14.2
7 | ‘AqabahPembaNowgongQu;12.9
8 | inhoSökeDordrechtPoáLaloG;13.1
9 | skişeh;12.9
10 | rhamDera Ghazi KhanMiyazakiBhātpār;21.3
11 | igButeboJuršinciKoaniImdinaNova VasDestrnikVarvarinSkopunGornji PetrovciRibnicaKon TumŠavnikPodl;11.5
12 | igButeboJuršinciKoaniImdinaNova VasDestrnikVarvarinSkopunGornji PetrovciRibnicaKon TumŠavnikPoul;18.5
13 | igButeboJuršinciKoaniImdinaNova VasDestrnikVarvarinSkomunGornji PetrovciRibnicaKon TumŠavnikPoul;22.5
14 | ixButeboJuršinciKoaniImdinaNova VasDestrnikVarvarinSkomunGornji PetrovciRibnicaKon TumŠavnikPoul;0.1
15 | B;8.9
16 | C;38.9
17 | nt-A;9.2
18 | y-le-MoutierSant’ArpinoPljevljaRo;0.8
19 | oGumlāSamā’;14.9
20 | os Reyes de SalgadoCinisello BalsamoKashibaH;20.0
21 | m el Bo;14.6
22 | mazunchaleZrenjaninFouchanaSurtPanč;6.7
23 | ġFis;9.6
24 | epé;28.2
25 | ālSongnimSanto TomasKoiduHoshangābādOpoleNovocheboksarskArarasKhannaPunoKoforiduaAhmadpur E;19.4
26 | iudad Melchor MúzquizQuinhámelDa;40.5
27 | ChesterLobnyaSan LeandroHemeiSolweziGrand BourgKaliboS;23.4
28 | cotánSan Ramón de la Nueva OránWausauGbaweTailaiRochester HillsVilla ElisaToba TekS;11.2
29 | raKielSibuYatoParanáSanta ClaraYamagataKatihārBeykozImperat;13.5
30 | l ‘;14.6
31 | TanjungpinangKasselHaldiaLuxorLạng SơnAt TājīTaraka;10.6
32 | MirnaPehčevoRopažiGus;16.7
33 | üSosnowiecTanauanMya;18.4
34 | ngoDübendorfC;11.7
35 | liLoretoPlacentiaAliso ViejoChomaPen-y-Bont ar OgwrCojutepeque;12.4
36 | burgazAl ḨawīyahSalamancaMbanza KongoNchelengeZhangaözenTurbatMatiMangghystaūMalak;21.5
37 | iCoahuitlánRabatJahāngīrpur SālkhaniCamUniversity of California-Santa BarbaraSerravalleTelkathuM;13.4
38 | lioúpoliBarahonaHoPhuketLe BardoBuena ParkKayesChampigny-sur-MarneHaskovoChathamBatleyEsteioRe;22.5
39 | PototanSahuayo de MorelosBambergMosigkauFrancisco BeltrãoJelenia GóraTelêmaco Borb;17.5
40 | CabindaKermānZunhuaRochesterValenzuelaOrūmīyehWugangShuangqiaoTshikapa;3.0
41 | venGaopingDunhuaAz Zarqā’SylhetKaihuaCaerdyddJāmnagarFuyuanGayaFlorianópolisC;1.9
42 | ntington StationKampong SpeuKakataMoschátoBressoVentspilsSaint-CloudTamboSidi Smai’ilDandenon;14.6
43 | rugarhVerāvalAlagoinhasEdremitBandırmaSalavatGandajikaLucapaLeesburgTamaRas Tan;10.9
44 | oCanagatanHelsinkiJabalpurProvidenceRuchengNizhniy NovgorodAhvāzJeparaShaoyangComayagüe;17.3
45 | ça PaulistaDarmstadtZhengdingPindamonhangabaEnschedeGirónUttarpāraHeidelbergK;6.0
46 | en IslandKota BharuCiudad López MateosCelayaVinhDuyunLos Mochis‘AjmānNyalaLarkanaWichitaNishi;11.9
47 |
--------------------------------------------------------------------------------
/src/test/resources/samples/measurements-dot.out:
--------------------------------------------------------------------------------
1 | {-=1.0/1.5/2.0, .=1.0/1.0/1.0}
2 |
--------------------------------------------------------------------------------
/src/test/resources/samples/measurements-dot.txt:
--------------------------------------------------------------------------------
1 | .;1.0
2 | -;1.0
3 | -;2.0
4 |
--------------------------------------------------------------------------------
/src/test/resources/samples/measurements-rounding.out:
--------------------------------------------------------------------------------
1 | {ham=14.6/25.5/33.6, jel=-9.0/18.0/46.5}
2 |
--------------------------------------------------------------------------------
/src/test/resources/samples/measurements-short.out:
--------------------------------------------------------------------------------
1 | {a=1.0/1.0/1.0, b=1.0/1.5/2.0}
2 |
--------------------------------------------------------------------------------
/src/test/resources/samples/measurements-short.txt:
--------------------------------------------------------------------------------
1 | a;1.0
2 | b;1.0
3 | b;2.0
4 |
--------------------------------------------------------------------------------
/src/test/resources/samples/measurements-shortest.out:
--------------------------------------------------------------------------------
1 | {a=1.0/1.0/1.0}
2 |
--------------------------------------------------------------------------------
/src/test/resources/samples/measurements-shortest.txt:
--------------------------------------------------------------------------------
1 | a;1.0
2 |
--------------------------------------------------------------------------------