├── .gitignore
├── Cargo.toml
├── .idea
├── .gitignore
├── vcs.xml
├── modules.xml
└── onebrc.iml
├── gen
├── Cargo.toml
└── src
│ ├── main.rs
│ └── lib.rs
├── onebrc
├── Cargo.toml
├── Cargo.lock
└── src
│ ├── main.rs
│ └── lib.rs
├── README.md
└── Cargo.lock
/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | measurements.txt
3 |
--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
1 | [workspace]
2 | resolver = "2"
3 |
4 | members = [
5 | "onebrc",
6 | "gen"
7 | ]
8 |
9 |
--------------------------------------------------------------------------------
/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 | # Editor-based HTTP Client requests
5 | /httpRequests/
6 |
--------------------------------------------------------------------------------
/gen/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "gen"
3 | version = "0.1.0"
4 | edition = "2021"
5 |
6 | [dependencies]
7 | rand = "0.8.5"
8 | rand_distr = "0.4.3"
9 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/onebrc/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "onebrc"
3 | version = "0.1.0"
4 | edition = "2021"
5 |
6 | [profile.release]
7 | debug = true
8 |
9 |
10 | [dependencies]
11 | rustc-hash = "2.0.0"
12 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/onebrc/Cargo.lock:
--------------------------------------------------------------------------------
1 | # This file is automatically @generated by Cargo.
2 | # It is not intended for manual editing.
3 | version = 3
4 |
5 | [[package]]
6 | name = "onebrc"
7 | version = "0.1.0"
8 | dependencies = [
9 | "rustc-hash",
10 | ]
11 |
12 | [[package]]
13 | name = "rustc-hash"
14 | version = "2.0.0"
15 | source = "registry+https://github.com/rust-lang/crates.io-index"
16 | checksum = "583034fd73374156e66797ed8e5b0d5690409c9226b22d87cb7f19821c05d152"
17 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # onebrc
2 |
3 | The [One Billion Row Challenge](https://1brc.dev/) asks a program to parse a simple (but very large) text file as quickly as possible.
4 |
5 | This implementation manages it in about 3.1s with a hot cache on an M2 Macbook Pro, using only safe Rust.
6 |
7 | The `onebrc` binary crate contains the implementation. `gen` is a Rust generator for the input file, for convenience.
8 | It takes a couple minutes to run; be sure to redirect output to a file.
--------------------------------------------------------------------------------
/gen/src/main.rs:
--------------------------------------------------------------------------------
1 | use std::env::args;
2 | use std::io::{BufWriter, Write};
3 | use std::process::exit;
4 | use gen;
5 |
6 | fn main() {
7 | let count: usize = if let [_, count_str, ..] = &args().collect::>()[..] {
8 | count_str.parse().expect("invalid count")
9 | } else {
10 | println!("Usage: gen ");
11 | exit(1);
12 | };
13 |
14 | let stdlock = std::io::stdout().lock();
15 |
16 | let mut bufout = BufWriter::new(stdlock);
17 | gen::gen(count)
18 | .for_each(|(city, temp)| {
19 | writeln!(bufout, "{city};{temp:.1}").unwrap();
20 | })
21 | }
22 |
--------------------------------------------------------------------------------
/.idea/onebrc.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/onebrc/src/main.rs:
--------------------------------------------------------------------------------
1 | use std::env::args;
2 | use std::error::Error;
3 | use std::fs::File;
4 | use std::io::{BufReader, Read, Seek, SeekFrom};
5 | use std::sync::mpsc;
6 | use onebrc::Table;
7 |
8 | fn main() -> Result<(), Box> {
9 | if let [_, filename, ..] = &args().collect::>()[..] {
10 | let mut infile = File::open(filename)?;
11 |
12 | let file_len = infile.seek(SeekFrom::End(0))?;
13 | let core_count: usize = std::thread::available_parallelism().unwrap().into();
14 | let num_chunks = core_count as u64;
15 | let mut splits: Vec<_> = (1..num_chunks).map(|i| i * (file_len/num_chunks))
16 | .map(|pos| {
17 | // seek forward to align with the start of a line
18 | infile.seek(SeekFrom::Start(pos)).unwrap();
19 | let mut b = [0u8; 1];
20 | while b[0] != b'\n' {
21 | infile.read(&mut b[..]).unwrap();
22 | }
23 | infile.stream_position().unwrap()
24 | })
25 | .collect();
26 | drop(infile);
27 |
28 | splits.insert(0, 0);
29 | splits.push(u64::MAX);
30 |
31 | let infiles: Vec<_> = splits.windows(2)
32 | .map(|splits| {
33 | let split = splits[0];
34 | let len = splits[1] - splits[0];
35 | let mut f = File::open(filename).expect("reopen failed");
36 | f.seek(SeekFrom::Start(split)).unwrap();
37 | f.take(len)
38 | })
39 | .collect();
40 |
41 | let (tx, rx) = mpsc::channel::();
42 | std::thread::scope(|s| {
43 | s.spawn(move || {
44 | let final_table = rx.iter().reduce(|mut l, r| {
45 | r.into_iter().for_each(|(k, r)| {
46 | let e = l.entry(k).or_default();
47 | e.merge(&r);
48 | });
49 | l
50 | })
51 | .unwrap();
52 | onebrc::report(&final_table).unwrap();
53 | });
54 |
55 |
56 | infiles.into_iter()
57 | .for_each(|f| {
58 | let tx = tx.clone();
59 | s.spawn(move || {
60 | let buf: BufReader<_> = BufReader::with_capacity(2 * 1024 * 1024, f);
61 | let t = onebrc::produce_table(buf);
62 | tx.send(t).expect("Send error")
63 | });
64 | });
65 | drop(tx);
66 | });
67 |
68 | Ok(())
69 | } else {
70 | println!("Usage: onebrc ");
71 | Ok(())
72 | }
73 | }
74 |
--------------------------------------------------------------------------------
/gen/src/lib.rs:
--------------------------------------------------------------------------------
1 | use rand;
2 | use rand::Rng;
3 | use rand_distr::Normal;
4 |
5 | const CITIES: [&str; 413] = ["Abha", "Abidjan", "Abéché", "Accra", "Addis Ababa", "Adelaide", "Aden", "Ahvaz", "Albuquerque", "Alexandra", "Alexandria", "Algiers", "Alice Springs", "Almaty", "Amsterdam", "Anadyr", "Anchorage", "Andorra la Vella", "Ankara", "Antananarivo", "Antsiranana", "Arkhangelsk", "Ashgabat", "Asmara", "Assab", "Astana", "Athens", "Atlanta", "Auckland", "Austin", "Baghdad", "Baguio", "Baku", "Baltimore", "Bamako", "Bangkok", "Bangui", "Banjul", "Barcelona", "Bata", "Batumi", "Beijing", "Beirut", "Belgrade", "Belize City", "Benghazi", "Bergen", "Berlin", "Bilbao", "Birao", "Bishkek", "Bissau", "Blantyre", "Bloemfontein", "Boise", "Bordeaux", "Bosaso", "Boston", "Bouaké", "Bratislava", "Brazzaville", "Bridgetown", "Brisbane", "Brussels", "Bucharest", "Budapest", "Bujumbura", "Bulawayo", "Burnie", "Busan", "Cabo San Lucas", "Cairns", "Cairo", "Calgary", "Canberra", "Cape Town", "Changsha", "Charlotte", "Chiang Mai", "Chicago", "Chihuahua", "Chittagong", "Chișinău", "Chongqing", "Christchurch", "City of San Marino", "Colombo", "Columbus", "Conakry", "Copenhagen", "Cotonou", "Cracow", "Da Lat", "Da Nang", "Dakar", "Dallas", "Damascus", "Dampier", "Dar es Salaam", "Darwin", "Denpasar", "Denver", "Detroit", "Dhaka", "Dikson", "Dili", "Djibouti", "Dodoma", "Dolisie", "Douala", "Dubai", "Dublin", "Dunedin", "Durban", "Dushanbe", "Edinburgh", "Edmonton", "El Paso", "Entebbe", "Erbil", "Erzurum", "Fairbanks", "Fianarantsoa", "Flores, Petén", "Frankfurt", "Fresno", "Fukuoka", "Gaborone", "Gabès", "Gagnoa", "Gangtok", "Garissa", "Garoua", "George Town", "Ghanzi", "Gjoa Haven", "Guadalajara", "Guangzhou", "Guatemala City", "Halifax", "Hamburg", "Hamilton", "Hanga Roa", "Hanoi", "Harare", "Harbin", "Hargeisa", "Hat Yai", "Havana", "Helsinki", "Heraklion", "Hiroshima", "Ho Chi Minh City", "Hobart", "Hong Kong", "Honiara", "Honolulu", "Houston", "Ifrane", "Indianapolis", "Iqaluit", "Irkutsk", "Istanbul", "Jacksonville", "Jakarta", "Jayapura", "Jerusalem", "Johannesburg", "Jos", "Juba", "Kabul", "Kampala", "Kandi", "Kankan", "Kano", "Kansas City", "Karachi", "Karonga", "Kathmandu", "Khartoum", "Kingston", "Kinshasa", "Kolkata", "Kuala Lumpur", "Kumasi", "Kunming", "Kuopio", "Kuwait City", "Kyiv", "Kyoto", "La Ceiba", "La Paz", "Lagos", "Lahore", "Lake Havasu City", "Lake Tekapo", "Las Palmas de Gran Canaria", "Las Vegas", "Launceston", "Lhasa", "Libreville", "Lisbon", "Livingstone", "Ljubljana", "Lodwar", "Lomé", "London", "Los Angeles", "Louisville", "Luanda", "Lubumbashi", "Lusaka", "Luxembourg City", "Lviv", "Lyon", "Madrid", "Mahajanga", "Makassar", "Makurdi", "Malabo", "Malé", "Managua", "Manama", "Mandalay", "Mango", "Manila", "Maputo", "Marrakesh", "Marseille", "Maun", "Medan", "Mek'ele", "Melbourne", "Memphis", "Mexicali", "Mexico City", "Miami", "Milan", "Milwaukee", "Minneapolis", "Minsk", "Mogadishu", "Mombasa", "Monaco", "Moncton", "Monterrey", "Montreal", "Moscow", "Mumbai", "Murmansk", "Muscat", "Mzuzu", "N'Djamena", "Naha", "Nairobi", "Nakhon Ratchasima", "Napier", "Napoli", "Nashville", "Nassau", "Ndola", "New Delhi", "New Orleans", "New York City", "Ngaoundéré", "Niamey", "Nicosia", "Niigata", "Nouadhibou", "Nouakchott", "Novosibirsk", "Nuuk", "Odesa", "Odienné", "Oklahoma City", "Omaha", "Oranjestad", "Oslo", "Ottawa", "Ouagadougou", "Ouahigouya", "Ouarzazate", "Oulu", "Palembang", "Palermo", "Palm Springs", "Palmerston North", "Panama City", "Parakou", "Paris", "Perth", "Petropavlovsk-Kamchatsky", "Philadelphia", "Phnom Penh", "Phoenix", "Pittsburgh", "Podgorica", "Pointe-Noire", "Pontianak", "Port Moresby", "Port Sudan", "Port Vila", "Port-Gentil", "Portland (OR)", "Porto", "Prague", "Praia", "Pretoria", "Pyongyang", "Rabat", "Rangpur", "Reggane", "Reykjavík", "Riga", "Riyadh", "Rome", "Roseau", "Rostov-on-Don", "Sacramento", "Saint Petersburg", "Saint-Pierre", "Salt Lake City", "San Antonio", "San Diego", "San Francisco", "San Jose", "San José", "San Juan", "San Salvador", "Sana'a", "Santo Domingo", "Sapporo", "Sarajevo", "Saskatoon", "Seattle", "Seoul", "Seville", "Shanghai", "Singapore", "Skopje", "Sochi", "Sofia", "Sokoto", "Split", "St. John's", "St. Louis", "Stockholm", "Surabaya", "Suva", "Suwałki", "Sydney", "Ségou", "Tabora", "Tabriz", "Taipei", "Tallinn", "Tamale", "Tamanrasset", "Tampa", "Tashkent", "Tauranga", "Tbilisi", "Tegucigalpa", "Tehran", "Tel Aviv", "Thessaloniki", "Thiès", "Tijuana", "Timbuktu", "Tirana", "Toamasina", "Tokyo", "Toliara", "Toluca", "Toronto", "Tripoli", "Tromsø", "Tucson", "Tunis", "Ulaanbaatar", "Upington", "Vaduz", "Valencia", "Valletta", "Vancouver", "Veracruz", "Vienna", "Vientiane", "Villahermosa", "Vilnius", "Virginia Beach", "Vladivostok", "Warsaw", "Washington, D.C.", "Wau", "Wellington", "Whitehorse", "Wichita", "Willemstad", "Winnipeg", "Wrocław", "Xi'an", "Yakutsk", "Yangon", "Yaoundé", "Yellowknife", "Yerevan", "Yinchuan", "Zagreb", "Zanzibar City", "Zürich", "Ürümqi", "İzmir"];
6 |
7 | pub fn gen(n: usize) -> impl Iterator- {
8 | let mut rng = rand::thread_rng();
9 |
10 | let city_dist = rand::distributions::Slice::new(&CITIES).unwrap();
11 | let temp_dist = Normal::new(15.0, 20.0).unwrap();
12 |
13 | (0..n).map(move |_| {
14 | let city = rng.sample(city_dist);
15 | let temp = rng.sample(temp_dist);
16 | (*city, temp)
17 | })
18 | }
--------------------------------------------------------------------------------
/Cargo.lock:
--------------------------------------------------------------------------------
1 | # This file is automatically @generated by Cargo.
2 | # It is not intended for manual editing.
3 | version = 3
4 |
5 | [[package]]
6 | name = "autocfg"
7 | version = "1.3.0"
8 | source = "registry+https://github.com/rust-lang/crates.io-index"
9 | checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0"
10 |
11 | [[package]]
12 | name = "byteorder"
13 | version = "1.5.0"
14 | source = "registry+https://github.com/rust-lang/crates.io-index"
15 | checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
16 |
17 | [[package]]
18 | name = "cfg-if"
19 | version = "1.0.0"
20 | source = "registry+https://github.com/rust-lang/crates.io-index"
21 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
22 |
23 | [[package]]
24 | name = "gen"
25 | version = "0.1.0"
26 | dependencies = [
27 | "rand",
28 | "rand_distr",
29 | ]
30 |
31 | [[package]]
32 | name = "getrandom"
33 | version = "0.2.15"
34 | source = "registry+https://github.com/rust-lang/crates.io-index"
35 | checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7"
36 | dependencies = [
37 | "cfg-if",
38 | "libc",
39 | "wasi",
40 | ]
41 |
42 | [[package]]
43 | name = "libc"
44 | version = "0.2.158"
45 | source = "registry+https://github.com/rust-lang/crates.io-index"
46 | checksum = "d8adc4bb1803a324070e64a98ae98f38934d91957a99cfb3a43dcbc01bc56439"
47 |
48 | [[package]]
49 | name = "libm"
50 | version = "0.2.8"
51 | source = "registry+https://github.com/rust-lang/crates.io-index"
52 | checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058"
53 |
54 | [[package]]
55 | name = "num-traits"
56 | version = "0.2.19"
57 | source = "registry+https://github.com/rust-lang/crates.io-index"
58 | checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
59 | dependencies = [
60 | "autocfg",
61 | "libm",
62 | ]
63 |
64 | [[package]]
65 | name = "onebrc"
66 | version = "0.1.0"
67 | dependencies = [
68 | "rustc-hash",
69 | ]
70 |
71 | [[package]]
72 | name = "ppv-lite86"
73 | version = "0.2.20"
74 | source = "registry+https://github.com/rust-lang/crates.io-index"
75 | checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04"
76 | dependencies = [
77 | "zerocopy",
78 | ]
79 |
80 | [[package]]
81 | name = "proc-macro2"
82 | version = "1.0.86"
83 | source = "registry+https://github.com/rust-lang/crates.io-index"
84 | checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77"
85 | dependencies = [
86 | "unicode-ident",
87 | ]
88 |
89 | [[package]]
90 | name = "quote"
91 | version = "1.0.37"
92 | source = "registry+https://github.com/rust-lang/crates.io-index"
93 | checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af"
94 | dependencies = [
95 | "proc-macro2",
96 | ]
97 |
98 | [[package]]
99 | name = "rand"
100 | version = "0.8.5"
101 | source = "registry+https://github.com/rust-lang/crates.io-index"
102 | checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
103 | dependencies = [
104 | "libc",
105 | "rand_chacha",
106 | "rand_core",
107 | ]
108 |
109 | [[package]]
110 | name = "rand_chacha"
111 | version = "0.3.1"
112 | source = "registry+https://github.com/rust-lang/crates.io-index"
113 | checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
114 | dependencies = [
115 | "ppv-lite86",
116 | "rand_core",
117 | ]
118 |
119 | [[package]]
120 | name = "rand_core"
121 | version = "0.6.4"
122 | source = "registry+https://github.com/rust-lang/crates.io-index"
123 | checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
124 | dependencies = [
125 | "getrandom",
126 | ]
127 |
128 | [[package]]
129 | name = "rand_distr"
130 | version = "0.4.3"
131 | source = "registry+https://github.com/rust-lang/crates.io-index"
132 | checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31"
133 | dependencies = [
134 | "num-traits",
135 | "rand",
136 | ]
137 |
138 | [[package]]
139 | name = "rustc-hash"
140 | version = "2.0.0"
141 | source = "registry+https://github.com/rust-lang/crates.io-index"
142 | checksum = "583034fd73374156e66797ed8e5b0d5690409c9226b22d87cb7f19821c05d152"
143 |
144 | [[package]]
145 | name = "syn"
146 | version = "2.0.77"
147 | source = "registry+https://github.com/rust-lang/crates.io-index"
148 | checksum = "9f35bcdf61fd8e7be6caf75f429fdca8beb3ed76584befb503b1569faee373ed"
149 | dependencies = [
150 | "proc-macro2",
151 | "quote",
152 | "unicode-ident",
153 | ]
154 |
155 | [[package]]
156 | name = "unicode-ident"
157 | version = "1.0.12"
158 | source = "registry+https://github.com/rust-lang/crates.io-index"
159 | checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
160 |
161 | [[package]]
162 | name = "wasi"
163 | version = "0.11.0+wasi-snapshot-preview1"
164 | source = "registry+https://github.com/rust-lang/crates.io-index"
165 | checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
166 |
167 | [[package]]
168 | name = "zerocopy"
169 | version = "0.7.35"
170 | source = "registry+https://github.com/rust-lang/crates.io-index"
171 | checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0"
172 | dependencies = [
173 | "byteorder",
174 | "zerocopy-derive",
175 | ]
176 |
177 | [[package]]
178 | name = "zerocopy-derive"
179 | version = "0.7.35"
180 | source = "registry+https://github.com/rust-lang/crates.io-index"
181 | checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
182 | dependencies = [
183 | "proc-macro2",
184 | "quote",
185 | "syn",
186 | ]
187 |
--------------------------------------------------------------------------------
/onebrc/src/lib.rs:
--------------------------------------------------------------------------------
1 | use std::io::{BufRead, BufReader, Read, Write};
2 | use rustc_hash::{FxBuildHasher, FxHashMap as HashMap};
3 | use std::error::Error;
4 | use std::collections::BTreeMap;
5 |
6 | #[derive(Debug, Clone)]
7 | pub struct Sample {
8 | min: f32,
9 | max: f32,
10 | sum: f32,
11 | count: u32,
12 | }
13 |
14 | impl Default for Sample {
15 | fn default() -> Self {
16 | Sample {
17 | min: f32::MAX,
18 | max: f32::MIN,
19 | sum: 0.0,
20 | count: 0,
21 | }
22 | }
23 | }
24 |
25 | impl From for Sample {
26 | fn from(value: f32) -> Self {
27 | Sample {
28 | min: value,
29 | max: value,
30 | sum: value,
31 | count: 1,
32 | }
33 | }
34 | }
35 |
36 | impl Sample {
37 | pub fn add(&mut self, v: f32) {
38 | self.min = self.min.min(v);
39 | self.max = self.max.max(v);
40 | self.sum += v;
41 | self.count += 1;
42 | }
43 |
44 | pub fn merge(&mut self, other: &Self) {
45 | self.min = self.min.min(other.min);
46 | self.max = self.max.max(other.max);
47 | self.sum += other.sum;
48 | self.count += other.count;
49 | }
50 |
51 | pub fn mean(&self) -> f32 {
52 | self.sum / self.count as f32
53 | }
54 | }
55 |
56 | pub type Table = HashMap, Sample>;
57 |
58 | fn insert_or_update(table: &mut Table, k: &[u8], v: f32) {
59 | if let Some(r) = table.get_mut(k) {
60 | r.add(v);
61 | } else {
62 | let r = Sample::from(v);
63 | table.insert(Vec::from(k), r);
64 | }
65 | }
66 |
67 | /// Takes an aligned reader and produces a summary table
68 | pub fn produce_table(mut reader: BufReader) -> Table {
69 | let mut table = Table::with_capacity_and_hasher(1000, FxBuildHasher);
70 |
71 | let mut stash = Vec::with_capacity(100);
72 |
73 | // Process the rows a page at a time. Page boundaries may split rows arbitrarily, so we have
74 | // to deal with those cases by stashing the end of one page and then fetching the next. We
75 | // assume no row is so large as to span three pages.
76 | while let Ok(mut buf) = reader.fill_buf() {
77 | if buf.is_empty() {
78 | break;
79 | }
80 | let mut it = buf.iter().enumerate();
81 | if let Some((sep, _)) = it.find(|(_, &b)| b == b';') {
82 | if let Some((end, _)) = it.find(|(_, &b)| b == b'\n') {
83 | let (name, rest) = buf.split_at(sep);
84 | let (val, _) = rest[1..].split_at(end - sep - 1);
85 |
86 | let v = parse_decimal(val);
87 |
88 | //dbg!(String::from_utf8_lossy(name), v);
89 | insert_or_update(&mut table, name, v);
90 | reader.consume(end+1);
91 | } else {
92 | // didn't get to the newline
93 | stash.extend_from_slice(buf);
94 | let consumed = buf.len();
95 | reader.consume(consumed);
96 | buf = reader.fill_buf().unwrap();
97 | let mut it = buf.iter().enumerate();
98 | if let Some((end, _)) = it.find(|(_, &b)| b == b'\n') {
99 | stash.extend_from_slice(&buf[..end]);
100 | let (name, rest) = stash.split_at(sep);
101 | let val = &rest[1..];
102 | let v = parse_decimal(val);
103 |
104 | // dbg!(String::from_utf8_lossy(name), v);
105 | insert_or_update(&mut table, name, v);
106 | reader.consume(end+1);
107 | } else {
108 | panic!("Missing newline");
109 | }
110 | }
111 | } else {
112 | // didn't find the separator
113 | stash.extend_from_slice(buf);
114 | let consumed = buf.len();
115 | reader.consume(consumed);
116 | buf = reader.fill_buf().unwrap();
117 | let mut it = buf.iter().enumerate();
118 | if let Some((sep, _)) = it.find(|(_, &b)| b == b';') {
119 | if let Some((end, _)) = it.find(|(_, &b)| b == b'\n') {
120 | let (name, rest) = buf.split_at(sep);
121 | stash.extend_from_slice(name);
122 | let (val, _) = rest[1..].split_at(end - sep - 1);
123 |
124 | let v = parse_decimal(val);
125 |
126 | // dbg!(String::from_utf8_lossy(name), v);
127 | insert_or_update(&mut table, &stash, v);
128 | reader.consume(end+1);
129 | } else {
130 | // didn't get to the newline
131 | stash.extend_from_slice(buf);
132 | let consumed = buf.len();
133 | reader.consume(consumed);
134 | buf = reader.fill_buf().unwrap();
135 | let mut it = buf.iter().enumerate();
136 | if let Some((end, _)) = it.find(|(_, &b)| b == b'\n') {
137 | stash.extend_from_slice(&buf[..end]);
138 | let (name, rest) = stash.split_at(sep);
139 | let val = &rest[1..];
140 | let v = parse_decimal(val);
141 |
142 | // dbg!(String::from_utf8_lossy(name), v);
143 | insert_or_update(&mut table, name, v);
144 | reader.consume(end+1);
145 | } else {
146 | panic!("Missing newline");
147 | }
148 | }
149 | }
150 | }
151 | stash.clear();
152 | }
153 |
154 | table
155 | }
156 |
157 | /// parses the simple decimal numbers used here directly from a byte slice
158 | fn parse_decimal(bs: &[u8]) -> f32 {
159 | let mut n = 0;
160 | let mut signum = 1;
161 | let mut dot = bs.len() - 1;
162 | for (i, &b) in bs.iter().enumerate() {
163 | match b {
164 | b'-' => {
165 | signum *= -1;
166 | }
167 | b'0'..=b'9' => {
168 | let v = b - b'0';
169 | n = n * 10 + signum * (v as i32)
170 | }
171 | b'.' => {
172 | dot = i;
173 | }
174 | _ => panic!("bad decimal character {b}")
175 | }
176 | }
177 |
178 | let n = n as f32;
179 | match (bs.len() - 1 - dot) as i32 {
180 | 0 => n,
181 | 1 => n / 10.0,
182 | 2 => n / 100.0,
183 | 3 => n / 1000.0,
184 | _ => {
185 | let d = (10.0f32).powi((bs.len() - 1 - dot) as i32);
186 | n / d
187 | }
188 | }
189 | }
190 |
191 | /// outputs the sorted report from a summary table
192 | pub fn report(table: &Table) -> Result<(), Box> {
193 | let mut stdout = std::io::stdout().lock();
194 | write!(stdout, "{{")?;
195 |
196 | let table: BTreeMap = table.iter().map(|(k, v)| {
197 | let city_str = String::from_utf8_lossy(k);
198 | (city_str.to_string(), v)
199 | })
200 | .collect();
201 | let mut first = true;
202 | for (city, record) in table.into_iter() {
203 | if !first {
204 | write!(stdout, ", ")?;
205 | } else {
206 | first = false;
207 | }
208 | write!(stdout, "{city}={:.1}/{:.1}/{:.1}", record.min, record.mean(), record.max)?;
209 | }
210 | writeln!(stdout, "}}")?;
211 | Ok(())
212 | }
213 |
--------------------------------------------------------------------------------