├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── go.mod └── main.go /.gitignore: -------------------------------------------------------------------------------- 1 | # If you prefer the allow list template instead of the deny list, see community template: 2 | # https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore 3 | # 4 | # Binaries for programs and plugins 5 | *.exe 6 | *.exe~ 7 | *.dll 8 | *.so 9 | *.dylib 10 | 11 | # Test binary, built with `go test -c` 12 | *.test 13 | 14 | # Output of the go coverage tool, specifically when used with LiteIDE 15 | *.out 16 | 17 | # Dependency directories (remove the comment below to include it) 18 | # vendor/ 19 | 20 | # Go workspace file 21 | go.work 22 | 23 | *.txt 24 | *.bin -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Karan Sharma 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Basic Makefile for a Go project 2 | 3 | # Binary name 4 | BINARY_NAME=./bin/1brc.bin 5 | 6 | # Build the project 7 | all: fresh 8 | 9 | # Fresh command (clean, build, and run) 10 | fresh: clean build run 11 | 12 | # Build command 13 | build: 14 | CGO_ENABLED=0 go build -o $(BINARY_NAME) -ldflags="-s -w" -v ./ 15 | 16 | # Run the project 17 | run: 18 | $(BINARY_NAME) --file=measurements.txt 19 | 20 | # Clean build files 21 | clean: 22 | go clean 23 | rm -f $(BINARY_NAME) 24 | 25 | # Make sure these targets are executed as commands 26 | .PHONY: all build run clean 27 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # One Billion Row Challenge 2 | 3 | A Go implementation of the [One Billion Row Challenge](https://github.com/gunnarmorling/1brc) 4 | 5 | Read the accompanying [blog post](https://mrkaran.dev/posts/1brc/) for more details. 6 | 7 | ## Highlights 8 | 9 | - Reads the file in chunks to efficiently to reduce I/O overhead. 10 | - Spawns N workers for N cores for processing chunks. 11 | - Mem allocation tweaks. Reuse byte buffers, avoid `strings.Split` for extra allocs etc 12 | - Separate worker for aggregating results. 13 | 14 | ## Prerequisites 15 | 16 | To generate the text file for these measurements, follow the steps outlined [here](https://github.com/gunnarmorling/1brc?tab=readme-ov-file#prerequisites). 17 | 18 | After running the commands, I have a `measurements.txt` on my file system: 19 | 20 | Example output after running the commands: 21 | 22 | ```sh 23 | ➜ 1brc-go git:(main) du -sh measurements.txt 24 | 13G measurements.txt 25 | ➜ 1brc-go git:(main) tail measurements.txt 26 | Mek'ele;13.3 27 | Kampala;50.8 28 | Dikson;-3.7 29 | Dodoma;20.3 30 | San Diego;7.1 31 | Chihuahua;20.3 32 | Ngaoundéré;24.2 33 | Toronto;12.7 34 | Wrocław;12.6 35 | Singapore;14.4 36 | ``` 37 | 38 | ## Run the challenge 39 | 40 | ```sh 41 | make run 42 | ``` 43 | 44 | ## Results 45 | 46 | Running the code on my laptop, which is Apple M2 Pro with 10‑core CPU, 32GB memory. 47 | 48 | | Chunk Size | Time | 49 | | ---------- | ------- | 50 | | 512.00 KB | 23.756s | 51 | | 1.00 MB | 21.798s | 52 | | 32.00 MB | 19.501s | 53 | | 16.00 MB | 20.693s | 54 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/mr-karan/1brc-go 2 | 3 | go 1.21.5 4 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "flag" 6 | "fmt" 7 | "os" 8 | "runtime" 9 | "sort" 10 | "strconv" 11 | "strings" 12 | "sync" 13 | ) 14 | 15 | var ( 16 | numWorkers int 17 | measurementsFilePath string 18 | chunkSize int64 19 | ) 20 | 21 | type Stats struct { 22 | Min, Mean, Max float64 23 | Count int 24 | } 25 | 26 | func init() { 27 | flag.StringVar(&measurementsFilePath, "file", "", "Path to the measurements file") 28 | flag.Int64Var(&chunkSize, "chunksize", 512*1024, "Size of each file chunk in bytes") 29 | flag.Parse() 30 | 31 | if measurementsFilePath == "" { 32 | fmt.Println("Error: Measurements file path is required") 33 | os.Exit(1) 34 | } 35 | 36 | numWorkers = runtime.NumCPU() 37 | runtime.GOMAXPROCS(numWorkers) 38 | } 39 | 40 | func main() { 41 | file, err := os.Open(measurementsFilePath) 42 | if err != nil { 43 | panic(err) 44 | } 45 | defer file.Close() 46 | 47 | resultsChan := make(chan map[string]Stats, numWorkers) 48 | var wg sync.WaitGroup 49 | var aggWg sync.WaitGroup 50 | 51 | aggWg.Add(1) 52 | finalResults := make(map[string]Stats) 53 | 54 | // Start a separate goroutine for aggregation 55 | go func() { 56 | defer aggWg.Done() 57 | for workerResult := range resultsChan { 58 | for station, stats := range workerResult { 59 | finalStats, exists := finalResults[station] 60 | if !exists { 61 | finalResults[station] = stats 62 | continue 63 | } 64 | finalStats.Min = min(finalStats.Min, stats.Min) 65 | finalStats.Max = max(finalStats.Max, stats.Max) 66 | totalCount := finalStats.Count + stats.Count 67 | finalStats.Mean = (finalStats.Mean*float64(finalStats.Count) + stats.Mean*float64(stats.Count)) / float64(totalCount) 68 | finalStats.Count = totalCount 69 | finalResults[station] = finalStats 70 | } 71 | } 72 | }() 73 | 74 | buf := make([]byte, chunkSize) 75 | leftover := make([]byte, 0, chunkSize) 76 | 77 | go func() { 78 | for { 79 | bytesRead, err := file.Read(buf) 80 | if bytesRead > 0 { 81 | chunk := make([]byte, bytesRead) 82 | copy(chunk, buf[:bytesRead]) 83 | validChunk, newLeftover := processChunk(chunk, leftover) 84 | leftover = newLeftover 85 | if len(validChunk) > 0 { 86 | wg.Add(1) 87 | go processChunkData(validChunk, resultsChan, &wg) 88 | } 89 | } 90 | if err != nil { 91 | break 92 | } 93 | } 94 | wg.Wait() 95 | close(resultsChan) 96 | }() 97 | 98 | aggWg.Wait() 99 | 100 | // Print results 101 | printStats(finalResults) 102 | } 103 | 104 | func processChunk(chunk, leftover []byte) (validChunk, newLeftover []byte) { 105 | firstNewline := -1 106 | lastNewline := -1 107 | for i, b := range chunk { 108 | if b == '\n' { 109 | if firstNewline == -1 { 110 | firstNewline = i 111 | } 112 | lastNewline = i 113 | } 114 | } 115 | if firstNewline != -1 { 116 | validChunk = append(leftover, chunk[:lastNewline+1]...) 117 | newLeftover = make([]byte, len(chunk[lastNewline+1:])) 118 | copy(newLeftover, chunk[lastNewline+1:]) 119 | } else { 120 | newLeftover = append(leftover, chunk...) 121 | } 122 | return validChunk, newLeftover 123 | } 124 | 125 | func processChunkData(chunk []byte, resultsChan chan<- map[string]Stats, wg *sync.WaitGroup) { 126 | defer wg.Done() 127 | 128 | stationStats := make(map[string]Stats) 129 | scanner := bufio.NewScanner(strings.NewReader(string(chunk))) 130 | 131 | for scanner.Scan() { 132 | line := scanner.Text() 133 | 134 | // Find the index of the delimiter 135 | delimiterIndex := strings.Index(line, ";") 136 | if delimiterIndex == -1 { 137 | continue // Delimiter not found, skip this line 138 | } 139 | 140 | // Extract the station name and temperature string 141 | station := line[:delimiterIndex] 142 | tempStr := line[delimiterIndex+1:] 143 | 144 | // Convert the temperature string to a float 145 | temp, err := strconv.ParseFloat(tempStr, 64) 146 | if err != nil { 147 | continue // Invalid temperature value, skip this line 148 | } 149 | 150 | // Update the statistics for the station 151 | stats, exists := stationStats[station] 152 | if !exists { 153 | stats = Stats{Min: temp, Max: temp} 154 | } 155 | stats.Count++ 156 | stats.Min = min(stats.Min, temp) 157 | stats.Max = max(stats.Max, temp) 158 | stats.Mean += (temp - stats.Mean) / float64(stats.Count) 159 | stationStats[station] = stats 160 | } 161 | 162 | // Send the computed stats to resultsChan 163 | resultsChan <- stationStats 164 | } 165 | 166 | func min(a, b float64) float64 { 167 | if a == 0 || a > b { 168 | return b 169 | } 170 | return a 171 | } 172 | 173 | func max(a, b float64) float64 { 174 | if a < b { 175 | return b 176 | } 177 | return a 178 | } 179 | 180 | func printStats(statsMap map[string]Stats) { 181 | var stations []string 182 | for station := range statsMap { 183 | stations = append(stations, station) 184 | } 185 | sort.Strings(stations) 186 | 187 | fmt.Print("{") 188 | for i, station := range stations { 189 | stats := statsMap[station] 190 | fmt.Printf("%s=%.1f/%.1f/%.1f", station, stats.Min, stats.Mean, stats.Max) 191 | if i < len(stations)-1 { 192 | fmt.Print(", ") 193 | } 194 | } 195 | fmt.Println("}") 196 | } 197 | --------------------------------------------------------------------------------