├── .gitignore
├── 1brc.awk
├── LICENSE.txt
├── README.md
├── go.mod
├── main.go
├── r1.go
├── r10.go
├── r2.go
├── r3.go
├── r4.go
├── r5.go
├── r6.go
├── r7.go
├── r8.go
├── r8_test.go
├── r9.go
└── testdata
    └── split.txt


/.gitignore:
--------------------------------------------------------------------------------
1 | measurements*.txt
2 | measurements*.out
3 | cpu.prof
4 | default.pgo
5 | go-1brc
6 | 


--------------------------------------------------------------------------------
/1brc.awk:
--------------------------------------------------------------------------------
 1 | BEGIN {
 2 | 	FS = ";"
 3 | }
 4 | 
 5 | {
 6 | 	if (counts[$1]++) {
 7 | 		mins[$1] = $2 < mins[$1] ? $2 : mins[$1]
 8 | 		maxs[$1] = $2 > maxs[$1] ? $2 : maxs[$1]
 9 | 	} else {
10 | 		mins[$1] = maxs[$1] = $2  # new entry
11 | 	}
12 | 	sums[$1] += $2
13 | }
14 | 
15 | END {
16 | 	printf "{"
17 | 	n = asorti(mins, sorted)
18 |     for (i = 1; i <= n; i++) {
19 |     	station = sorted[i]
20 | 		min = mins[station]
21 | 		max = maxs[station]
22 | 		mean = sums[station] / counts[station]
23 | 		printf "%s=%.1f/%.1f/%.1f", station, min, mean, max
24 | 		if (i < n) {
25 | 			printf ", "
26 | 		}
27 | 	}
28 | 	printf "}\n"
29 | }
30 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Ben Hoyt
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | 
2 | # The One Billion Row Challenge in Go: from 1m45s to 4s
3 | 
4 | These are my progressively-faster solutions to the One Billion Row Challenge in Go: from a simple unoptimised version (r1.go) that takes 1 minute 45 seconds, to an optimised and parallelised version (r9.go) that takes 4 seconds.
5 | 
6 | [Read the article](https://benhoyt.com/writings/go-1brc/) for a detailed write-up and results.
7 | 


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module github.com/benhoyt/go-1brc
2 | 
3 | go 1.21.0
4 | 


--------------------------------------------------------------------------------
/main.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"bufio"
  5 | 	"bytes"
  6 | 	"flag"
  7 | 	"fmt"
  8 | 	"io"
  9 | 	"math"
 10 | 	"os"
 11 | 	"runtime"
 12 | 	"runtime/pprof"
 13 | 	"time"
 14 | )
 15 | 
 16 | type revisionFunc func(string, io.Writer) error
 17 | 
 18 | var revisionFuncs = []revisionFunc{r1, r2, r3, r4, r5, r6, r7, r8, r9, r10}
 19 | 
 20 | var maxGoroutines int
 21 | 
 22 | func main() {
 23 | 	var (
 24 | 		cpuProfile = flag.String("cpuprofile", "", "write CPU profile to file")
 25 | 		revision   = flag.Int("revision", len(revisionFuncs), "revision of solution to run")
 26 | 		goroutines = flag.Int("goroutines", 0, "num goroutines for parallel solutions (default NumCPU)")
 27 | 		benchAll   = flag.Bool("benchall", false, "benchmark all solutions")
 28 | 	)
 29 | 	flag.Usage = func() {
 30 | 		fmt.Fprintf(flag.CommandLine.Output(),
 31 | 			"Usage: go-r1bc [-cpuprofile=PROFILE] [-revision=N] INPUTFILE\n")
 32 | 		flag.PrintDefaults()
 33 | 	}
 34 | 	flag.Parse()
 35 | 
 36 | 	if *revision < 1 || *revision > len(revisionFuncs) {
 37 | 		fmt.Fprintf(os.Stderr, "invalid revision %d\n", *revision)
 38 | 		os.Exit(1)
 39 | 	}
 40 | 	maxGoroutines = *goroutines
 41 | 	if maxGoroutines == 0 {
 42 | 		maxGoroutines = runtime.NumCPU()
 43 | 	}
 44 | 
 45 | 	args := flag.Args()
 46 | 	if len(args) < 1 {
 47 | 		flag.Usage()
 48 | 		os.Exit(2)
 49 | 	}
 50 | 	inputPath := args[0]
 51 | 
 52 | 	st, err := os.Stat(inputPath)
 53 | 	if err != nil {
 54 | 		fmt.Fprintf(os.Stderr, "error: %v\n", err)
 55 | 		os.Exit(1)
 56 | 	}
 57 | 	size := st.Size()
 58 | 
 59 | 	if *cpuProfile != "" {
 60 | 		f, err := os.Create(*cpuProfile)
 61 | 		if err != nil {
 62 | 			fmt.Fprintf(os.Stderr, "error: %v\n", err)
 63 | 			os.Exit(1)
 64 | 		}
 65 | 		pprof.StartCPUProfile(f)
 66 | 		defer pprof.StopCPUProfile()
 67 | 	}
 68 | 
 69 | 	if *benchAll {
 70 | 		err := benchmarkAll(inputPath)
 71 | 		if err != nil {
 72 | 			fmt.Fprintf(os.Stderr, "error: %v\n", err)
 73 | 			os.Exit(1)
 74 | 		}
 75 | 		return
 76 | 	}
 77 | 
 78 | 	start := time.Now()
 79 | 	output := bufio.NewWriter(os.Stdout)
 80 | 
 81 | 	rf := revisionFuncs[*revision-1]
 82 | 	err = rf(inputPath, output)
 83 | 	if err != nil {
 84 | 		fmt.Fprintf(os.Stderr, "error: %v\n", err)
 85 | 		os.Exit(1)
 86 | 	}
 87 | 
 88 | 	output.Flush()
 89 | 	elapsed := time.Since(start)
 90 | 	fmt.Fprintf(os.Stderr, "Processed %.1fMB in %s\n",
 91 | 		float64(size)/(1024*1024), elapsed)
 92 | }
 93 | 
 94 | func benchmarkAll(inputPath string) error {
 95 | 	const tries = 5
 96 | 
 97 | 	var buf bytes.Buffer
 98 | 	err := r1(inputPath, &buf)
 99 | 	if err != nil {
100 | 		return err
101 | 	}
102 | 	expected := buf.String()
103 | 
104 | 	var r1Best time.Duration
105 | 
106 | 	for i, rf := range revisionFuncs {
107 | 		fmt.Fprintf(os.Stderr, "r%d: ", i+1)
108 | 		bestTime := time.Duration(math.MaxInt64)
109 | 		for try := 0; try < tries; try++ {
110 | 			var output bytes.Buffer
111 | 			start := time.Now()
112 | 			err := rf(inputPath, &output)
113 | 			if err != nil {
114 | 				return err
115 | 			}
116 | 			elapsed := time.Since(start)
117 | 			fmt.Fprintf(os.Stderr, "%v ", elapsed)
118 | 			bestTime = min(bestTime, elapsed)
119 | 			if i == 0 {
120 | 				r1Best = bestTime
121 | 			}
122 | 
123 | 			if output.String() != expected {
124 | 				return fmt.Errorf("r%d didn't give correct result", i+1)
125 | 			}
126 | 		}
127 | 		fmt.Fprintf(os.Stderr, "- best: %v (%.2fx as fast as r1)\n",
128 | 			bestTime, float64(r1Best)/float64(bestTime))
129 | 	}
130 | 	return nil
131 | }
132 | 


--------------------------------------------------------------------------------
/r1.go:
--------------------------------------------------------------------------------
 1 | // r1: simple, idiomatic Go using bufio.Scanner and strconv.ParseFloat
 2 | //
 3 | // ~1.004s for 10M rows
 4 | 
 5 | package main
 6 | 
 7 | import (
 8 | 	"bufio"
 9 | 	"fmt"
10 | 	"io"
11 | 	"os"
12 | 	"sort"
13 | 	"strconv"
14 | 	"strings"
15 | )
16 | 
17 | func r1(inputPath string, output io.Writer) error {
18 | 	type stats struct {
19 | 		min, max, sum float64
20 | 		count         int64
21 | 	}
22 | 
23 | 	f, err := os.Open(inputPath)
24 | 	if err != nil {
25 | 		return err
26 | 	}
27 | 	defer f.Close()
28 | 
29 | 	stationStats := make(map[string]stats)
30 | 
31 | 	scanner := bufio.NewScanner(f)
32 | 	for scanner.Scan() {
33 | 		line := scanner.Text()
34 | 		station, tempStr, hasSemi := strings.Cut(line, ";")
35 | 		if !hasSemi {
36 | 			continue
37 | 		}
38 | 
39 | 		temp, err := strconv.ParseFloat(tempStr, 64)
40 | 		if err != nil {
41 | 			return err
42 | 		}
43 | 
44 | 		s, ok := stationStats[station]
45 | 		if !ok {
46 | 			s.min = temp
47 | 			s.max = temp
48 | 			s.sum = temp
49 | 			s.count = 1
50 | 		} else {
51 | 			s.min = min(s.min, temp)
52 | 			s.max = max(s.max, temp)
53 | 			s.sum += temp
54 | 			s.count++
55 | 		}
56 | 		stationStats[station] = s
57 | 	}
58 | 
59 | 	stations := make([]string, 0, len(stationStats))
60 | 	for station := range stationStats {
61 | 		stations = append(stations, station)
62 | 	}
63 | 	sort.Strings(stations)
64 | 
65 | 	fmt.Fprint(output, "{")
66 | 	for i, station := range stations {
67 | 		if i > 0 {
68 | 			fmt.Fprint(output, ", ")
69 | 		}
70 | 		s := stationStats[station]
71 | 		mean := s.sum / float64(s.count)
72 | 		fmt.Fprintf(output, "%s=%.1f/%.1f/%.1f", station, s.min, mean, s.max)
73 | 	}
74 | 	fmt.Fprint(output, "}\n")
75 | 	return nil
76 | }
77 | 


--------------------------------------------------------------------------------
/r10.go:
--------------------------------------------------------------------------------
  1 | // r10: all the previous optimizations plus faster semicolon finding and
  2 | // hashing
  3 | //
  4 | // Translated from Java by Menno Finlay-Smits Ideas with ideas taken from
  5 | // this fast Java solution:
  6 | //
  7 | // https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_mtopolnik.java
  8 | //
  9 | // On my (Ben's) laptop I get these initial results:
 10 | //
 11 | // $ ./go-1brc -revision=1 ../1brc/data/measurements.txt >out-r1
 12 | // Processed 13156.2MB in 1m39.507011009s
 13 | // $ ./go-1brc -revision=9 ../1brc/data/measurements.txt >out-r9
 14 | // Processed 13156.2MB in 2.893693843s  # 34.4x as fast as the r1 above
 15 | // $ ./go-1brc -revision=10 ../1brc/data/measurements.txt >out-r10
 16 | // Processed 13156.2MB in 2.497241029s  # 39.8x as fast as the r1 above
 17 | 
 18 | package main
 19 | 
 20 | import (
 21 | 	"bytes"
 22 | 	"encoding/binary"
 23 | 	"fmt"
 24 | 	"io"
 25 | 	"math/bits"
 26 | 	"os"
 27 | 	"sort"
 28 | )
 29 | 
 30 | const BroadcastSemicolon = 0x3B3B3B3B3B3B3B3B
 31 | const Broadcast0x01 = 0x0101010101010101
 32 | const Broadcast0x80 = 0x8080808080808080
 33 | 
 34 | type r10Stats struct {
 35 | 	min, max, count int32
 36 | 	sum             int64
 37 | }
 38 | 
 39 | func r10(inputPath string, output io.Writer) error {
 40 | 	parts, err := splitFile(inputPath, maxGoroutines)
 41 | 	if err != nil {
 42 | 		return err
 43 | 	}
 44 | 
 45 | 	resultsCh := make(chan map[string]*r10Stats)
 46 | 	for _, part := range parts {
 47 | 		go r10ProcessPart(inputPath, part.offset, part.size, resultsCh)
 48 | 	}
 49 | 
 50 | 	totals := make(map[string]*r10Stats)
 51 | 	for i := 0; i < len(parts); i++ {
 52 | 		result := <-resultsCh
 53 | 		for station, s := range result {
 54 | 			ts := totals[station]
 55 | 			if ts == nil {
 56 | 				totals[station] = s
 57 | 				continue
 58 | 			}
 59 | 			ts.min = min(ts.min, s.min)
 60 | 			ts.max = max(ts.max, s.max)
 61 | 			ts.sum += s.sum
 62 | 			ts.count += s.count
 63 | 		}
 64 | 	}
 65 | 
 66 | 	stations := make([]string, 0, len(totals))
 67 | 	for station := range totals {
 68 | 		stations = append(stations, station)
 69 | 	}
 70 | 	sort.Strings(stations)
 71 | 
 72 | 	fmt.Fprint(output, "{")
 73 | 	for i, station := range stations {
 74 | 		if i > 0 {
 75 | 			fmt.Fprint(output, ", ")
 76 | 		}
 77 | 		s := totals[station]
 78 | 		mean := float64(s.sum) / float64(s.count) / 10
 79 | 		fmt.Fprintf(output, "%s=%.1f/%.1f/%.1f", station, float64(s.min)/10, mean, float64(s.max)/10)
 80 | 	}
 81 | 	fmt.Fprint(output, "}\n")
 82 | 
 83 | 	return nil
 84 | }
 85 | 
 86 | func r10ProcessPart(inputPath string, fileOffset, fileSize int64, resultsCh chan map[string]*r10Stats) {
 87 | 	file, err := os.Open(inputPath)
 88 | 	if err != nil {
 89 | 		panic(err)
 90 | 	}
 91 | 	defer file.Close()
 92 | 	_, err = file.Seek(fileOffset, io.SeekStart)
 93 | 	if err != nil {
 94 | 		panic(err)
 95 | 	}
 96 | 	f := io.LimitedReader{R: file, N: fileSize}
 97 | 
 98 | 	type item struct {
 99 | 		key  []byte
100 | 		stat *r10Stats
101 | 	}
102 | 	const numBuckets = 1 << 17        // number of hash buckets (power of 2)
103 | 	items := make([]item, numBuckets) // hash buckets, linearly probed
104 | 	size := 0                         // number of active items in items slice
105 | 
106 | 	buf := make([]byte, 1024*1024)
107 | 	readStart := 0
108 | 	for {
109 | 		n, err := f.Read(buf[readStart:])
110 | 		if err != nil && err != io.EOF {
111 | 			panic(err)
112 | 		}
113 | 		if readStart+n == 0 {
114 | 			break
115 | 		}
116 | 		chunk := buf[:readStart+n]
117 | 
118 | 		newline := bytes.LastIndexByte(chunk, '\n')
119 | 		if newline < 0 {
120 | 			break
121 | 		}
122 | 		remaining := chunk[newline+1:]
123 | 		chunk = chunk[:newline+1]
124 | 
125 | 	chunkLoop:
126 | 		for {
127 | 			var hash uint64
128 | 			var station, after []byte
129 | 
130 | 			if len(chunk) < 8 {
131 | 				break chunkLoop
132 | 			}
133 | 
134 | 			nameWord0 := binary.NativeEndian.Uint64(chunk)
135 | 			matchBits := semicolonMatchBits(nameWord0)
136 | 			if matchBits != 0 {
137 | 				// semicolon is in the first 8 bytes
138 | 				nameLen := calcNameLen(matchBits)
139 | 				nameWord0 = maskWord(nameWord0, matchBits)
140 | 				station = chunk[:nameLen]
141 | 				after = chunk[nameLen+1:]
142 | 				hash = calcHash(nameWord0)
143 | 			} else {
144 | 				// station name is longer so keep looking for the semicolon in
145 | 				// uint64 chunks
146 | 				nameLen := 8
147 | 				hash = calcHash(nameWord0)
148 | 				for {
149 | 					if nameLen > len(chunk)-8 {
150 | 						break chunkLoop
151 | 					}
152 | 					lastNameWord := binary.NativeEndian.Uint64(chunk[nameLen:])
153 | 					matchBits = semicolonMatchBits(lastNameWord)
154 | 					if matchBits != 0 {
155 | 						nameLen += calcNameLen(matchBits)
156 | 						station = chunk[:nameLen]
157 | 						after = chunk[nameLen+1:]
158 | 						break
159 | 					}
160 | 					nameLen += 8
161 | 				}
162 | 			}
163 | 			index := 0
164 | 			negative := false
165 | 			if after[index] == '-' {
166 | 				negative = true
167 | 				index++
168 | 			}
169 | 			temp := int32(after[index] - '0')
170 | 			index++
171 | 			if after[index] != '.' {
172 | 				temp = temp*10 + int32(after[index]-'0')
173 | 				index++
174 | 			}
175 | 			index++ // skip '.'
176 | 			temp = temp*10 + int32(after[index]-'0')
177 | 			index += 2 // skip last digit and '\n'
178 | 			if negative {
179 | 				temp = -temp
180 | 			}
181 | 			chunk = after[index:]
182 | 
183 | 			hashIndex := int(hash & (numBuckets - 1))
184 | 			for {
185 | 				if items[hashIndex].key == nil {
186 | 					// Found empty slot, add new item (copying key).
187 | 					key := make([]byte, len(station))
188 | 					copy(key, station)
189 | 					items[hashIndex] = item{
190 | 						key: key,
191 | 						stat: &r10Stats{
192 | 							min:   temp,
193 | 							max:   temp,
194 | 							sum:   int64(temp),
195 | 							count: 1,
196 | 						},
197 | 					}
198 | 					size++
199 | 					if size > numBuckets/2 {
200 | 						panic("too many items in hash table")
201 | 					}
202 | 					break
203 | 				}
204 | 				if bytes.Equal(items[hashIndex].key, station) {
205 | 					// Found matching slot, add to existing stats.
206 | 					s := items[hashIndex].stat
207 | 					s.min = min(s.min, temp)
208 | 					s.max = max(s.max, temp)
209 | 					s.sum += int64(temp)
210 | 					s.count++
211 | 					break
212 | 				}
213 | 				// Slot already holds another key, try next slot (linear probe).
214 | 				hashIndex++
215 | 				if hashIndex >= numBuckets {
216 | 					hashIndex = 0
217 | 				}
218 | 			}
219 | 		}
220 | 
221 | 		readStart = copy(buf, remaining)
222 | 	}
223 | 
224 | 	result := make(map[string]*r10Stats, size)
225 | 	for _, item := range items {
226 | 		if item.key == nil {
227 | 			continue
228 | 		}
229 | 		result[string(item.key)] = item.stat
230 | 	}
231 | 	resultsCh <- result
232 | }
233 | 
234 | func calcNameLen(b uint64) int {
235 | 	return (bits.TrailingZeros64(b) >> 3)
236 | }
237 | 
238 | func calcHash(word uint64) uint64 {
239 | 	return bits.RotateLeft64(word*0x51_7c_c1_b7_27_22_0a_95, 17)
240 | }
241 | 
242 | func semicolonMatchBits(word uint64) uint64 {
243 | 	diff := word ^ BroadcastSemicolon
244 | 	return (diff - Broadcast0x01) & (^diff & Broadcast0x80)
245 | }
246 | 
247 | func maskWord(word, matchBits uint64) uint64 {
248 | 	mask := matchBits ^ (matchBits - 1)
249 | 	return word & mask
250 | }
251 | 


--------------------------------------------------------------------------------
/r2.go:
--------------------------------------------------------------------------------
 1 | // r2: use stats pointer as map value to avoid double hashing
 2 | //
 3 | // ~921ms for 10M rows (1.09x as fast as r1)
 4 | 
 5 | package main
 6 | 
 7 | import (
 8 | 	"bufio"
 9 | 	"fmt"
10 | 	"io"
11 | 	"os"
12 | 	"sort"
13 | 	"strconv"
14 | 	"strings"
15 | )
16 | 
17 | func r2(inputPath string, output io.Writer) error {
18 | 	type stats struct {
19 | 		min, max, sum float64
20 | 		count         int64
21 | 	}
22 | 
23 | 	f, err := os.Open(inputPath)
24 | 	if err != nil {
25 | 		return err
26 | 	}
27 | 	defer f.Close()
28 | 
29 | 	stationStats := make(map[string]*stats)
30 | 
31 | 	scanner := bufio.NewScanner(f)
32 | 	for scanner.Scan() {
33 | 		line := scanner.Text()
34 | 		station, tempStr, hasSemi := strings.Cut(line, ";")
35 | 		if !hasSemi {
36 | 			continue
37 | 		}
38 | 
39 | 		temp, err := strconv.ParseFloat(tempStr, 64)
40 | 		if err != nil {
41 | 			return err
42 | 		}
43 | 
44 | 		s := stationStats[station]
45 | 		if s == nil {
46 | 			stationStats[station] = &stats{
47 | 				min:   temp,
48 | 				max:   temp,
49 | 				sum:   temp,
50 | 				count: 1,
51 | 			}
52 | 		} else {
53 | 			s.min = min(s.min, temp)
54 | 			s.max = max(s.max, temp)
55 | 			s.sum += temp
56 | 			s.count++
57 | 		}
58 | 	}
59 | 
60 | 	stations := make([]string, 0, len(stationStats))
61 | 	for station := range stationStats {
62 | 		stations = append(stations, station)
63 | 	}
64 | 	sort.Strings(stations)
65 | 
66 | 	fmt.Fprint(output, "{")
67 | 	for i, station := range stations {
68 | 		if i > 0 {
69 | 			fmt.Fprint(output, ", ")
70 | 		}
71 | 		s := stationStats[station]
72 | 		mean := s.sum / float64(s.count)
73 | 		fmt.Fprintf(output, "%s=%.1f/%.1f/%.1f", station, s.min, mean, s.max)
74 | 	}
75 | 	fmt.Fprint(output, "}\n")
76 | 	return nil
77 | }
78 | 


--------------------------------------------------------------------------------
/r3.go:
--------------------------------------------------------------------------------
 1 | // r3: parse temperatures manually instead of using strconv.ParseFloat
 2 | //
 3 | // ~517ms for 10M rows (1.94x as fast as r1)
 4 | 
 5 | package main
 6 | 
 7 | import (
 8 | 	"bufio"
 9 | 	"bytes"
10 | 	"fmt"
11 | 	"io"
12 | 	"os"
13 | 	"sort"
14 | )
15 | 
16 | func r3(inputPath string, output io.Writer) error {
17 | 	type stats struct {
18 | 		min, max, sum float64
19 | 		count         int64
20 | 	}
21 | 
22 | 	f, err := os.Open(inputPath)
23 | 	if err != nil {
24 | 		return err
25 | 	}
26 | 	defer f.Close()
27 | 
28 | 	stationStats := make(map[string]*stats)
29 | 
30 | 	scanner := bufio.NewScanner(f)
31 | 	for scanner.Scan() {
32 | 		line := scanner.Bytes()
33 | 		station, tempBytes, hasSemi := bytes.Cut(line, []byte(";"))
34 | 		if !hasSemi {
35 | 			continue
36 | 		}
37 | 
38 | 		negative := false
39 | 		index := 0
40 | 		if tempBytes[index] == '-' {
41 | 			index++
42 | 			negative = true
43 | 		}
44 | 		temp := float64(tempBytes[index] - '0')
45 | 		index++
46 | 		if tempBytes[index] != '.' {
47 | 			temp = temp*10 + float64(tempBytes[index]-'0')
48 | 			index++
49 | 		}
50 | 		index++ // skip '.'
51 | 		temp += float64(tempBytes[index]-'0') / 10
52 | 		if negative {
53 | 			temp = -temp
54 | 		}
55 | 
56 | 		s := stationStats[string(station)]
57 | 		if s == nil {
58 | 			stationStats[string(station)] = &stats{
59 | 				min:   temp,
60 | 				max:   temp,
61 | 				sum:   temp,
62 | 				count: 1,
63 | 			}
64 | 		} else {
65 | 			s.min = min(s.min, temp)
66 | 			s.max = max(s.max, temp)
67 | 			s.sum += temp
68 | 			s.count++
69 | 		}
70 | 	}
71 | 
72 | 	stations := make([]string, 0, len(stationStats))
73 | 	for station := range stationStats {
74 | 		stations = append(stations, station)
75 | 	}
76 | 	sort.Strings(stations)
77 | 
78 | 	fmt.Fprint(output, "{")
79 | 	for i, station := range stations {
80 | 		if i > 0 {
81 | 			fmt.Fprint(output, ", ")
82 | 		}
83 | 		s := stationStats[station]
84 | 		mean := s.sum / float64(s.count)
85 | 		fmt.Fprintf(output, "%s=%.1f/%.1f/%.1f", station, s.min, mean, s.max)
86 | 	}
87 | 	fmt.Fprint(output, "}\n")
88 | 	return nil
89 | }
90 | 


--------------------------------------------------------------------------------
/r4.go:
--------------------------------------------------------------------------------
 1 | // r4: use fixed point int32s (*10) instead of float64s
 2 | //
 3 | // ~491ms for 10M rows (2.04x as fast as r1)
 4 | 
 5 | package main
 6 | 
 7 | import (
 8 | 	"bufio"
 9 | 	"bytes"
10 | 	"fmt"
11 | 	"io"
12 | 	"os"
13 | 	"sort"
14 | )
15 | 
16 | func r4(inputPath string, output io.Writer) error {
17 | 	type stats struct {
18 | 		min, max, count int32
19 | 		sum             int64
20 | 	}
21 | 
22 | 	f, err := os.Open(inputPath)
23 | 	if err != nil {
24 | 		return err
25 | 	}
26 | 	defer f.Close()
27 | 
28 | 	stationStats := make(map[string]*stats)
29 | 
30 | 	scanner := bufio.NewScanner(f)
31 | 	for scanner.Scan() {
32 | 		line := scanner.Bytes()
33 | 		station, tempBytes, hasSemi := bytes.Cut(line, []byte(";"))
34 | 		if !hasSemi {
35 | 			continue
36 | 		}
37 | 
38 | 		negative := false
39 | 		index := 0
40 | 		if tempBytes[index] == '-' {
41 | 			index++
42 | 			negative = true
43 | 		}
44 | 		temp := int32(tempBytes[index] - '0')
45 | 		index++
46 | 		if tempBytes[index] != '.' {
47 | 			temp = temp*10 + int32(tempBytes[index]-'0')
48 | 			index++
49 | 		}
50 | 		index++ // skip '.'
51 | 		temp = temp*10 + int32(tempBytes[index]-'0')
52 | 		if negative {
53 | 			temp = -temp
54 | 		}
55 | 
56 | 		s := stationStats[string(station)]
57 | 		if s == nil {
58 | 			stationStats[string(station)] = &stats{
59 | 				min:   temp,
60 | 				max:   temp,
61 | 				sum:   int64(temp),
62 | 				count: 1,
63 | 			}
64 | 		} else {
65 | 			s.min = min(s.min, temp)
66 | 			s.max = max(s.max, temp)
67 | 			s.sum += int64(temp)
68 | 			s.count++
69 | 		}
70 | 	}
71 | 
72 | 	stations := make([]string, 0, len(stationStats))
73 | 	for station := range stationStats {
74 | 		stations = append(stations, station)
75 | 	}
76 | 	sort.Strings(stations)
77 | 
78 | 	fmt.Fprint(output, "{")
79 | 	for i, station := range stations {
80 | 		if i > 0 {
81 | 			fmt.Fprint(output, ", ")
82 | 		}
83 | 		s := stationStats[station]
84 | 		mean := float64(s.sum) / float64(s.count) / 10
85 | 		fmt.Fprintf(output, "%s=%.1f/%.1f/%.1f", station, float64(s.min)/10, mean, float64(s.max)/10)
86 | 	}
87 | 	fmt.Fprint(output, "}\n")
88 | 	return nil
89 | }
90 | 


--------------------------------------------------------------------------------
/r5.go:
--------------------------------------------------------------------------------
 1 | // r5: avoid bytes.Cut
 2 | //
 3 | // ~442ms for 10M rows (2.27x as fast as r1)
 4 | 
 5 | package main
 6 | 
 7 | import (
 8 | 	"bufio"
 9 | 	"fmt"
10 | 	"io"
11 | 	"os"
12 | 	"sort"
13 | )
14 | 
15 | func r5(inputPath string, output io.Writer) error {
16 | 	type stats struct {
17 | 		min, max, count int32
18 | 		sum             int64
19 | 	}
20 | 
21 | 	f, err := os.Open(inputPath)
22 | 	if err != nil {
23 | 		return err
24 | 	}
25 | 	defer f.Close()
26 | 
27 | 	stationStats := make(map[string]*stats)
28 | 
29 | 	scanner := bufio.NewScanner(f)
30 | 	for scanner.Scan() {
31 | 		line := scanner.Bytes()
32 | 
33 | 		end := len(line)
34 | 		tenths := int32(line[end-1] - '0')
35 | 		ones := int32(line[end-3] - '0') // line[end-2] is '.'
36 | 		var temp int32
37 | 		var semicolon int
38 | 		if line[end-4] == ';' {
39 | 			temp = ones*10 + tenths
40 | 			semicolon = end - 4
41 | 		} else if line[end-4] == '-' {
42 | 			temp = -(ones*10 + tenths)
43 | 			semicolon = end - 5
44 | 		} else {
45 | 			tens := int32(line[end-4] - '0')
46 | 			if line[end-5] == ';' {
47 | 				temp = tens*100 + ones*10 + tenths
48 | 				semicolon = end - 5
49 | 			} else { // '-'
50 | 				temp = -(tens*100 + ones*10 + tenths)
51 | 				semicolon = end - 6
52 | 			}
53 | 		}
54 | 
55 | 		station := line[:semicolon]
56 | 		s := stationStats[string(station)]
57 | 		if s == nil {
58 | 			stationStats[string(station)] = &stats{
59 | 				min:   temp,
60 | 				max:   temp,
61 | 				sum:   int64(temp),
62 | 				count: 1,
63 | 			}
64 | 		} else {
65 | 			s.min = min(s.min, temp)
66 | 			s.max = max(s.max, temp)
67 | 			s.sum += int64(temp)
68 | 			s.count++
69 | 		}
70 | 	}
71 | 
72 | 	stations := make([]string, 0, len(stationStats))
73 | 	for station := range stationStats {
74 | 		stations = append(stations, station)
75 | 	}
76 | 	sort.Strings(stations)
77 | 
78 | 	fmt.Fprint(output, "{")
79 | 	for i, station := range stations {
80 | 		if i > 0 {
81 | 			fmt.Fprint(output, ", ")
82 | 		}
83 | 		s := stationStats[station]
84 | 		mean := float64(s.sum) / float64(s.count) / 10
85 | 		fmt.Fprintf(output, "%s=%.1f/%.1f/%.1f", station, float64(s.min)/10, mean, float64(s.max)/10)
86 | 	}
87 | 	fmt.Fprint(output, "}\n")
88 | 	return nil
89 | }
90 | 


--------------------------------------------------------------------------------
/r6.go:
--------------------------------------------------------------------------------
  1 | // r6: don't use bufio.Scanner to avoid scanning some bytes twice
  2 | //
  3 | // ~399ms for 10M rows (2.52x as fast as r1)
  4 | 
  5 | package main
  6 | 
  7 | import (
  8 | 	"bytes"
  9 | 	"fmt"
 10 | 	"io"
 11 | 	"os"
 12 | 	"sort"
 13 | )
 14 | 
 15 | func r6(inputPath string, output io.Writer) error {
 16 | 	type stats struct {
 17 | 		min, max, count int32
 18 | 		sum             int64
 19 | 	}
 20 | 
 21 | 	f, err := os.Open(inputPath)
 22 | 	if err != nil {
 23 | 		return err
 24 | 	}
 25 | 	defer f.Close()
 26 | 
 27 | 	stationStats := make(map[string]*stats)
 28 | 
 29 | 	buf := make([]byte, 1024*1024)
 30 | 	readStart := 0
 31 | 	for {
 32 | 		n, err := f.Read(buf[readStart:])
 33 | 		if err != nil && err != io.EOF {
 34 | 			return err
 35 | 		}
 36 | 		if readStart+n == 0 {
 37 | 			break
 38 | 		}
 39 | 		chunk := buf[:readStart+n]
 40 | 
 41 | 		newline := bytes.LastIndexByte(chunk, '\n')
 42 | 		if newline < 0 {
 43 | 			break
 44 | 		}
 45 | 		remaining := chunk[newline+1:]
 46 | 		chunk = chunk[:newline+1]
 47 | 
 48 | 		for {
 49 | 			station, after, hasSemi := bytes.Cut(chunk, []byte(";"))
 50 | 			if !hasSemi {
 51 | 				break
 52 | 			}
 53 | 
 54 | 			index := 0
 55 | 			negative := false
 56 | 			if after[index] == '-' {
 57 | 				negative = true
 58 | 				index++
 59 | 			}
 60 | 			temp := int32(after[index] - '0')
 61 | 			index++
 62 | 			if after[index] != '.' {
 63 | 				temp = temp*10 + int32(after[index]-'0')
 64 | 				index++
 65 | 			}
 66 | 			index++ // skip '.'
 67 | 			temp = temp*10 + int32(after[index]-'0')
 68 | 			index += 2 // skip last digit and '\n'
 69 | 			if negative {
 70 | 				temp = -temp
 71 | 			}
 72 | 			chunk = after[index:]
 73 | 
 74 | 			s := stationStats[string(station)]
 75 | 			if s == nil {
 76 | 				stationStats[string(station)] = &stats{
 77 | 					min:   temp,
 78 | 					max:   temp,
 79 | 					sum:   int64(temp),
 80 | 					count: 1,
 81 | 				}
 82 | 			} else {
 83 | 				s.min = min(s.min, temp)
 84 | 				s.max = max(s.max, temp)
 85 | 				s.sum += int64(temp)
 86 | 				s.count++
 87 | 			}
 88 | 		}
 89 | 
 90 | 		readStart = copy(buf, remaining)
 91 | 	}
 92 | 
 93 | 	stations := make([]string, 0, len(stationStats))
 94 | 	for station := range stationStats {
 95 | 		stations = append(stations, station)
 96 | 	}
 97 | 	sort.Strings(stations)
 98 | 
 99 | 	fmt.Fprint(output, "{")
100 | 	for i, station := range stations {
101 | 		if i > 0 {
102 | 			fmt.Fprint(output, ", ")
103 | 		}
104 | 		s := stationStats[station]
105 | 		mean := float64(s.sum) / float64(s.count) / 10
106 | 		fmt.Fprintf(output, "%s=%.1f/%.1f/%.1f", station, float64(s.min)/10, mean, float64(s.max)/10)
107 | 	}
108 | 	fmt.Fprint(output, "}\n")
109 | 	return nil
110 | }
111 | 


--------------------------------------------------------------------------------
/r7.go:
--------------------------------------------------------------------------------
  1 | // r7: use custom hash table and hash station name as we look for ';'
  2 | //
  3 | // ~234ms for 10M rows (4.29x as fast as r1)
  4 | 
  5 | package main
  6 | 
  7 | import (
  8 | 	"bytes"
  9 | 	"fmt"
 10 | 	"io"
 11 | 	"os"
 12 | 	"sort"
 13 | )
 14 | 
 15 | func r7(inputPath string, output io.Writer) error {
 16 | 	type stats struct {
 17 | 		min, max, count int32
 18 | 		sum             int64
 19 | 	}
 20 | 
 21 | 	f, err := os.Open(inputPath)
 22 | 	if err != nil {
 23 | 		return err
 24 | 	}
 25 | 	defer f.Close()
 26 | 
 27 | 	type item struct {
 28 | 		key  []byte
 29 | 		stat *stats
 30 | 	}
 31 | 	const numBuckets = 1 << 17        // number of hash buckets (power of 2)
 32 | 	items := make([]item, numBuckets) // hash buckets, linearly probed
 33 | 	size := 0                         // number of active items in items slice
 34 | 
 35 | 	buf := make([]byte, 1024*1024)
 36 | 	readStart := 0
 37 | 	for {
 38 | 		n, err := f.Read(buf[readStart:])
 39 | 		if err != nil && err != io.EOF {
 40 | 			return err
 41 | 		}
 42 | 		if readStart+n == 0 {
 43 | 			break
 44 | 		}
 45 | 		chunk := buf[:readStart+n]
 46 | 
 47 | 		newline := bytes.LastIndexByte(chunk, '\n')
 48 | 		if newline < 0 {
 49 | 			break
 50 | 		}
 51 | 		remaining := chunk[newline+1:]
 52 | 		chunk = chunk[:newline+1]
 53 | 
 54 | 		for {
 55 | 			const (
 56 | 				// FNV-1 64-bit constants from hash/fnv.
 57 | 				offset64 = 14695981039346656037
 58 | 				prime64  = 1099511628211
 59 | 			)
 60 | 
 61 | 			var station, after []byte
 62 | 			hash := uint64(offset64)
 63 | 			i := 0
 64 | 			for ; i < len(chunk); i++ {
 65 | 				c := chunk[i]
 66 | 				if c == ';' {
 67 | 					station = chunk[:i]
 68 | 					after = chunk[i+1:]
 69 | 					break
 70 | 				}
 71 | 				hash ^= uint64(c) // FNV-1a is XOR then *
 72 | 				hash *= prime64
 73 | 			}
 74 | 			if i == len(chunk) {
 75 | 				break
 76 | 			}
 77 | 
 78 | 			index := 0
 79 | 			negative := false
 80 | 			if after[index] == '-' {
 81 | 				negative = true
 82 | 				index++
 83 | 			}
 84 | 			temp := int32(after[index] - '0')
 85 | 			index++
 86 | 			if after[index] != '.' {
 87 | 				temp = temp*10 + int32(after[index]-'0')
 88 | 				index++
 89 | 			}
 90 | 			index++ // skip '.'
 91 | 			temp = temp*10 + int32(after[index]-'0')
 92 | 			index += 2 // skip last digit and '\n'
 93 | 			if negative {
 94 | 				temp = -temp
 95 | 			}
 96 | 			chunk = after[index:]
 97 | 
 98 | 			hashIndex := int(hash & uint64(numBuckets-1))
 99 | 			for {
100 | 				if items[hashIndex].key == nil {
101 | 					// Found empty slot, add new item (copying key).
102 | 					key := make([]byte, len(station))
103 | 					copy(key, station)
104 | 					items[hashIndex] = item{
105 | 						key: key,
106 | 						stat: &stats{
107 | 							min:   temp,
108 | 							max:   temp,
109 | 							sum:   int64(temp),
110 | 							count: 1,
111 | 						},
112 | 					}
113 | 					size++
114 | 					if size > numBuckets/2 {
115 | 						panic("too many items in hash table")
116 | 					}
117 | 					break
118 | 				}
119 | 				if bytes.Equal(items[hashIndex].key, station) {
120 | 					// Found matching slot, add to existing stats.
121 | 					s := items[hashIndex].stat
122 | 					s.min = min(s.min, temp)
123 | 					s.max = max(s.max, temp)
124 | 					s.sum += int64(temp)
125 | 					s.count++
126 | 					break
127 | 				}
128 | 				// Slot already holds another key, try next slot (linear probe).
129 | 				hashIndex++
130 | 				if hashIndex >= numBuckets {
131 | 					hashIndex = 0
132 | 				}
133 | 			}
134 | 		}
135 | 
136 | 		readStart = copy(buf, remaining)
137 | 	}
138 | 
139 | 	stationItems := make([]item, 0, size)
140 | 	for _, item := range items {
141 | 		if item.key == nil {
142 | 			continue
143 | 		}
144 | 		stationItems = append(stationItems, item)
145 | 	}
146 | 	sort.Slice(stationItems, func(i, j int) bool {
147 | 		return string(stationItems[i].key) < string(stationItems[j].key)
148 | 	})
149 | 
150 | 	fmt.Fprint(output, "{")
151 | 	for i, item := range stationItems {
152 | 		if i > 0 {
153 | 			fmt.Fprint(output, ", ")
154 | 		}
155 | 		s := item.stat
156 | 		mean := float64(s.sum) / float64(s.count) / 10
157 | 		fmt.Fprintf(output, "%s=%.1f/%.1f/%.1f", item.key, float64(s.min)/10, mean, float64(s.max)/10)
158 | 	}
159 | 	fmt.Fprint(output, "}\n")
160 | 	return nil
161 | }
162 | 


--------------------------------------------------------------------------------
/r8.go:
--------------------------------------------------------------------------------
  1 | // r8: add some parallelism (but back to non-optimized r1 version)
  2 | //
  3 | // ~213ms for 10M rows (4.71x as fast as r1)
  4 | 
  5 | package main
  6 | 
  7 | import (
  8 | 	"bufio"
  9 | 	"bytes"
 10 | 	"fmt"
 11 | 	"io"
 12 | 	"os"
 13 | 	"sort"
 14 | 	"strconv"
 15 | 	"strings"
 16 | )
 17 | 
 18 | type r8Stats struct {
 19 | 	min, max, sum float64
 20 | 	count         int64
 21 | }
 22 | 
 23 | func r8(inputPath string, output io.Writer) error {
 24 | 	parts, err := splitFile(inputPath, maxGoroutines)
 25 | 	if err != nil {
 26 | 		return err
 27 | 	}
 28 | 
 29 | 	resultsCh := make(chan map[string]r8Stats)
 30 | 	for _, part := range parts {
 31 | 		go r8ProcessPart(inputPath, part.offset, part.size, resultsCh)
 32 | 	}
 33 | 
 34 | 	totals := make(map[string]r8Stats)
 35 | 	for i := 0; i < len(parts); i++ {
 36 | 		result := <-resultsCh
 37 | 		for station, s := range result {
 38 | 			ts, ok := totals[station]
 39 | 			if !ok {
 40 | 				totals[station] = r8Stats{
 41 | 					min:   s.min,
 42 | 					max:   s.max,
 43 | 					sum:   s.sum,
 44 | 					count: s.count,
 45 | 				}
 46 | 				continue
 47 | 			}
 48 | 			ts.min = min(ts.min, s.min)
 49 | 			ts.max = max(ts.max, s.max)
 50 | 			ts.sum += s.sum
 51 | 			ts.count += s.count
 52 | 			totals[station] = ts
 53 | 		}
 54 | 	}
 55 | 
 56 | 	stations := make([]string, 0, len(totals))
 57 | 	for station := range totals {
 58 | 		stations = append(stations, station)
 59 | 	}
 60 | 	sort.Strings(stations)
 61 | 
 62 | 	fmt.Fprint(output, "{")
 63 | 	for i, station := range stations {
 64 | 		if i > 0 {
 65 | 			fmt.Fprint(output, ", ")
 66 | 		}
 67 | 		s := totals[station]
 68 | 		mean := s.sum / float64(s.count)
 69 | 		fmt.Fprintf(output, "%s=%.1f/%.1f/%.1f", station, s.min, mean, s.max)
 70 | 	}
 71 | 	fmt.Fprint(output, "}\n")
 72 | 	return nil
 73 | }
 74 | 
 75 | func r8ProcessPart(inputPath string, fileOffset, fileSize int64, resultsCh chan map[string]r8Stats) {
 76 | 	file, err := os.Open(inputPath)
 77 | 	if err != nil {
 78 | 		panic(err)
 79 | 	}
 80 | 	defer file.Close()
 81 | 	_, err = file.Seek(fileOffset, io.SeekStart)
 82 | 	if err != nil {
 83 | 		panic(err)
 84 | 	}
 85 | 	f := io.LimitedReader{R: file, N: fileSize}
 86 | 
 87 | 	stationStats := make(map[string]r8Stats)
 88 | 
 89 | 	scanner := bufio.NewScanner(&f)
 90 | 	for scanner.Scan() {
 91 | 		line := scanner.Text()
 92 | 		station, tempStr, hasSemi := strings.Cut(line, ";")
 93 | 		if !hasSemi {
 94 | 			continue
 95 | 		}
 96 | 
 97 | 		temp, err := strconv.ParseFloat(tempStr, 64)
 98 | 		if err != nil {
 99 | 			panic(err)
100 | 		}
101 | 
102 | 		s, ok := stationStats[station]
103 | 		if !ok {
104 | 			s.min = temp
105 | 			s.max = temp
106 | 			s.sum = temp
107 | 			s.count = 1
108 | 		} else {
109 | 			s.min = min(s.min, temp)
110 | 			s.max = max(s.max, temp)
111 | 			s.sum += temp
112 | 			s.count++
113 | 		}
114 | 		stationStats[station] = s
115 | 	}
116 | 
117 | 	resultsCh <- stationStats
118 | }
119 | 
120 | type part struct {
121 | 	offset, size int64
122 | }
123 | 
124 | func splitFile(inputPath string, numParts int) ([]part, error) {
125 | 	const maxLineLength = 100
126 | 
127 | 	f, err := os.Open(inputPath)
128 | 	if err != nil {
129 | 		return nil, err
130 | 	}
131 | 	st, err := f.Stat()
132 | 	if err != nil {
133 | 		return nil, err
134 | 	}
135 | 	size := st.Size()
136 | 	splitSize := size / int64(numParts)
137 | 
138 | 	buf := make([]byte, maxLineLength)
139 | 
140 | 	parts := make([]part, 0, numParts)
141 | 	offset := int64(0)
142 | 	for i := 0; i < numParts; i++ {
143 | 		if i == numParts-1 {
144 | 			if offset < size {
145 | 				parts = append(parts, part{offset, size - offset})
146 | 			}
147 | 			break
148 | 		}
149 | 
150 | 		seekOffset := max(offset+splitSize-maxLineLength, 0)
151 | 		_, err := f.Seek(seekOffset, io.SeekStart)
152 | 		if err != nil {
153 | 			return nil, err
154 | 		}
155 | 		n, _ := io.ReadFull(f, buf)
156 | 		chunk := buf[:n]
157 | 		newline := bytes.LastIndexByte(chunk, '\n')
158 | 		if newline < 0 {
159 | 			return nil, fmt.Errorf("newline not found at offset %d", offset+splitSize-maxLineLength)
160 | 		}
161 | 		remaining := len(chunk) - newline - 1
162 | 		nextOffset := seekOffset + int64(len(chunk)) - int64(remaining)
163 | 		parts = append(parts, part{offset, nextOffset - offset})
164 | 		offset = nextOffset
165 | 	}
166 | 	return parts, nil
167 | }
168 | 


--------------------------------------------------------------------------------
/r8_test.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"os"
 5 | 	"testing"
 6 | )
 7 | 
 8 | func TestSplitFileEntire(t *testing.T) {
 9 | 	const path = "testdata/split.txt"
10 | 
11 | 	parts, err := splitFile(path, 4)
12 | 	if err != nil {
13 | 		t.Fatalf("Failed to split %s: %v", path, err)
14 | 	}
15 | 
16 | 	st, err := os.Stat(path)
17 | 	if err != nil {
18 | 		t.Fatalf("Failed to stat %s: %v", path, err)
19 | 	}
20 | 	fileSize := st.Size()
21 | 
22 | 	partsSize := int64(0)
23 | 	for _, part := range parts {
24 | 		partsSize += part.size
25 | 	}
26 | 
27 | 	if partsSize != fileSize {
28 | 		t.Errorf("Want size %d, got %d", fileSize, partsSize)
29 | 	}
30 | }
31 | 


--------------------------------------------------------------------------------
/r9.go:
--------------------------------------------------------------------------------
  1 | // r9: all the previous optimizations plus parallel execution
  2 | //
  3 | // 43ms for 10M rows (23.3x as fast as r1)
  4 | 
  5 | package main
  6 | 
  7 | import (
  8 | 	"bytes"
  9 | 	"fmt"
 10 | 	"io"
 11 | 	"os"
 12 | 	"sort"
 13 | )
 14 | 
 15 | type r9Stats struct {
 16 | 	min, max, count int32
 17 | 	sum             int64
 18 | }
 19 | 
 20 | func r9(inputPath string, output io.Writer) error {
 21 | 	parts, err := splitFile(inputPath, maxGoroutines)
 22 | 	if err != nil {
 23 | 		return err
 24 | 	}
 25 | 
 26 | 	resultsCh := make(chan map[string]*r9Stats)
 27 | 	for _, part := range parts {
 28 | 		go r9ProcessPart(inputPath, part.offset, part.size, resultsCh)
 29 | 	}
 30 | 
 31 | 	totals := make(map[string]*r9Stats)
 32 | 	for i := 0; i < len(parts); i++ {
 33 | 		result := <-resultsCh
 34 | 		for station, s := range result {
 35 | 			ts := totals[station]
 36 | 			if ts == nil {
 37 | 				totals[station] = s
 38 | 				continue
 39 | 			}
 40 | 			ts.min = min(ts.min, s.min)
 41 | 			ts.max = max(ts.max, s.max)
 42 | 			ts.sum += s.sum
 43 | 			ts.count += s.count
 44 | 		}
 45 | 	}
 46 | 
 47 | 	stations := make([]string, 0, len(totals))
 48 | 	for station := range totals {
 49 | 		stations = append(stations, station)
 50 | 	}
 51 | 	sort.Strings(stations)
 52 | 
 53 | 	fmt.Fprint(output, "{")
 54 | 	for i, station := range stations {
 55 | 		if i > 0 {
 56 | 			fmt.Fprint(output, ", ")
 57 | 		}
 58 | 		s := totals[station]
 59 | 		mean := float64(s.sum) / float64(s.count) / 10
 60 | 		fmt.Fprintf(output, "%s=%.1f/%.1f/%.1f", station, float64(s.min)/10, mean, float64(s.max)/10)
 61 | 	}
 62 | 	fmt.Fprint(output, "}\n")
 63 | 	return nil
 64 | }
 65 | 
 66 | func r9ProcessPart(inputPath string, fileOffset, fileSize int64, resultsCh chan map[string]*r9Stats) {
 67 | 	file, err := os.Open(inputPath)
 68 | 	if err != nil {
 69 | 		panic(err)
 70 | 	}
 71 | 	defer file.Close()
 72 | 	_, err = file.Seek(fileOffset, io.SeekStart)
 73 | 	if err != nil {
 74 | 		panic(err)
 75 | 	}
 76 | 	f := io.LimitedReader{R: file, N: fileSize}
 77 | 
 78 | 	type item struct {
 79 | 		key  []byte
 80 | 		stat *r9Stats
 81 | 	}
 82 | 	const numBuckets = 1 << 17        // number of hash buckets (power of 2)
 83 | 	items := make([]item, numBuckets) // hash buckets, linearly probed
 84 | 	size := 0                         // number of active items in items slice
 85 | 
 86 | 	buf := make([]byte, 1024*1024)
 87 | 	readStart := 0
 88 | 	for {
 89 | 		n, err := f.Read(buf[readStart:])
 90 | 		if err != nil && err != io.EOF {
 91 | 			panic(err)
 92 | 		}
 93 | 		if readStart+n == 0 {
 94 | 			break
 95 | 		}
 96 | 		chunk := buf[:readStart+n]
 97 | 
 98 | 		newline := bytes.LastIndexByte(chunk, '\n')
 99 | 		if newline < 0 {
100 | 			break
101 | 		}
102 | 		remaining := chunk[newline+1:]
103 | 		chunk = chunk[:newline+1]
104 | 
105 | 		for {
106 | 			const (
107 | 				// FNV-1 64-bit constants from hash/fnv.
108 | 				offset64 = 14695981039346656037
109 | 				prime64  = 1099511628211
110 | 			)
111 | 
112 | 			var station, after []byte
113 | 			hash := uint64(offset64)
114 | 			i := 0
115 | 			for ; i < len(chunk); i++ {
116 | 				c := chunk[i]
117 | 				if c == ';' {
118 | 					station = chunk[:i]
119 | 					after = chunk[i+1:]
120 | 					break
121 | 				}
122 | 				hash ^= uint64(c) // FNV-1a is XOR then *
123 | 				hash *= prime64
124 | 			}
125 | 			if i == len(chunk) {
126 | 				break
127 | 			}
128 | 
129 | 			index := 0
130 | 			negative := false
131 | 			if after[index] == '-' {
132 | 				negative = true
133 | 				index++
134 | 			}
135 | 			temp := int32(after[index] - '0')
136 | 			index++
137 | 			if after[index] != '.' {
138 | 				temp = temp*10 + int32(after[index]-'0')
139 | 				index++
140 | 			}
141 | 			index++ // skip '.'
142 | 			temp = temp*10 + int32(after[index]-'0')
143 | 			index += 2 // skip last digit and '\n'
144 | 			if negative {
145 | 				temp = -temp
146 | 			}
147 | 			chunk = after[index:]
148 | 
149 | 			hashIndex := int(hash & (numBuckets - 1))
150 | 			for {
151 | 				if items[hashIndex].key == nil {
152 | 					// Found empty slot, add new item (copying key).
153 | 					key := make([]byte, len(station))
154 | 					copy(key, station)
155 | 					items[hashIndex] = item{
156 | 						key: key,
157 | 						stat: &r9Stats{
158 | 							min:   temp,
159 | 							max:   temp,
160 | 							sum:   int64(temp),
161 | 							count: 1,
162 | 						},
163 | 					}
164 | 					size++
165 | 					if size > numBuckets/2 {
166 | 						panic("too many items in hash table")
167 | 					}
168 | 					break
169 | 				}
170 | 				if bytes.Equal(items[hashIndex].key, station) {
171 | 					// Found matching slot, add to existing stats.
172 | 					s := items[hashIndex].stat
173 | 					s.min = min(s.min, temp)
174 | 					s.max = max(s.max, temp)
175 | 					s.sum += int64(temp)
176 | 					s.count++
177 | 					break
178 | 				}
179 | 				// Slot already holds another key, try next slot (linear probe).
180 | 				hashIndex++
181 | 				if hashIndex >= numBuckets {
182 | 					hashIndex = 0
183 | 				}
184 | 			}
185 | 		}
186 | 
187 | 		readStart = copy(buf, remaining)
188 | 	}
189 | 
190 | 	result := make(map[string]*r9Stats, size)
191 | 	for _, item := range items {
192 | 		if item.key == nil {
193 | 			continue
194 | 		}
195 | 		result[string(item.key)] = item.stat
196 | 	}
197 | 	resultsCh <- result
198 | }
199 | 


--------------------------------------------------------------------------------
/testdata/split.txt:
--------------------------------------------------------------------------------
 1 | Belas;48.7
 2 | Hulin;-95.1
 3 | Neuwied;50.2
 4 | Bellinzona;10.3
 5 | Volgorechensk;-52.6
 6 | Narón;15.0
 7 | Sibilia;-63.3
 8 | Bedum;9.0
 9 | Qyzylorda;-10.6
10 | Muzambinho;67.7
11 | Ząbki;-26.8
12 | Madnūr;-37.5
13 | Kornwestheim;13.4
14 | Bom Jesus dos Perdões;55.0
15 | Benjamin Constant;26.5
16 | Tiaong;-98.7
17 | North Whitehall;3.7
18 | La Flèche;74.7
19 | Mezőkövesd;-75.3
20 | Mahazony;-38.1
21 | Kawachinagano;-76.4
22 | Lamarão;59.9
23 | Neiva;-81.8
24 | White City;28.7
25 | Detva;73.1
26 | Toffo;25.8
27 | Kamenka;-40.8
28 | Oulad Friha;94.2
29 | Jishi;8.6
30 | Manzanares el Real;61.2
31 | Tom Price;26.2
32 | Lusanga;86.8
33 | Ban Bo Luang;-47.6
34 | Ansonia;1.1
35 | Scherpenzeel;-79.6
36 | 


--------------------------------------------------------------------------------