├── .github └── workflows │ └── rust.yml ├── .gitignore ├── Cargo.toml ├── FAQ.md ├── LICENSE-APACHE ├── LICENSE-MIT ├── README.md ├── benchmark_tools ├── Cargo.toml ├── benchmark_tools.iml └── src │ ├── data_reader.rs │ ├── main.rs │ └── persisting_hasher.rs ├── build.rs ├── compare ├── Cargo.toml ├── Table.png ├── readme.md ├── resources │ └── sheet.css ├── src │ └── main.rs └── tests │ └── compare.rs ├── no_std_test ├── Cargo.toml └── src │ └── main.rs ├── rustfmt.toml ├── smhasher ├── ahash-cbindings │ ├── Cargo.toml │ ├── install.sh │ └── src │ │ └── lib.rs ├── ahashOutput.txt ├── clone_smhasher.sh ├── fallbackNoFoldedOutput.txt └── fallbackOutput.txt ├── src ├── aes_hash.rs ├── convert.rs ├── fallback_hash.rs ├── hash_map.rs ├── hash_quality_test.rs ├── hash_set.rs ├── lib.rs ├── operations.rs ├── random_state.rs └── specialize.rs └── tests ├── bench.rs ├── map_tests.rs └── nopanic.rs /.github/workflows/rust.yml: -------------------------------------------------------------------------------- 1 | name: Rust 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | steps: 9 | - uses: actions/checkout@v4 10 | - name: Install latest stable 11 | uses: dtolnay/rust-toolchain@master 12 | with: 13 | toolchain: stable 14 | components: clippy 15 | - name: check nostd 16 | run: cargo check --no-default-features 17 | - name: test nostd 18 | run: cargo test --no-default-features 19 | - name: check constrandom 20 | run: cargo check --no-default-features --features compile-time-rng 21 | - name: test constrandom 22 | run: cargo test --no-default-features --features compile-time-rng 23 | - name: check fixed-seed 24 | run: cargo check --no-default-features --features std 25 | - name: check 26 | run: cargo check 27 | - name: test 28 | run: cargo test 29 | nightly: 30 | name: nightly 31 | runs-on: ubuntu-latest 32 | env: 33 | RUSTFLAGS: -C target-cpu=native 34 | steps: 35 | - uses: actions/checkout@v4 36 | - name: Install latest nightly 37 | uses: dtolnay/rust-toolchain@master 38 | with: 39 | toolchain: nightly 40 | components: clippy 41 | - name: check nightly 42 | run: cargo check -Z msrv-policy 43 | - name: test nightly 44 | run: cargo test 45 | - name: check serde 46 | run: cargo check --features serde 47 | - name: test serde 48 | run: cargo test --features serde 49 | linux_arm7: 50 | name: Linux ARMv7 51 | runs-on: ubuntu-latest 52 | steps: 53 | - uses: actions/checkout@v4 54 | - uses: dtolnay/rust-toolchain@master 55 | with: 56 | toolchain: stable 57 | targets: armv7-unknown-linux-gnueabihf 58 | - run: cargo check --target armv7-unknown-linux-gnueabihf 59 | - name: Install 1.72.0 60 | uses: dtolnay/rust-toolchain@master 61 | with: 62 | toolchain: 1.72.0 63 | targets: armv7-unknown-linux-gnueabihf 64 | - run: cargo +1.72.0 check --target armv7-unknown-linux-gnueabihf 65 | aarch64-apple-darwin: 66 | name: Aarch64 Apple Darwin 67 | runs-on: macos-latest 68 | steps: 69 | - uses: actions/checkout@v4 70 | - uses: dtolnay/rust-toolchain@master 71 | with: 72 | toolchain: stable 73 | targets: aarch64-apple-darwin 74 | - run: cargo check --target aarch64-apple-darwin 75 | - run: cargo test 76 | - run: cargo test --no-default-features --features compile-time-rng 77 | - name: Install 1.72.0 78 | uses: dtolnay/rust-toolchain@master 79 | with: 80 | toolchain: 1.72.0 81 | targets: aarch64-apple-darwin 82 | - run: cargo +1.72.0 check --target aarch64-apple-darwin 83 | i686-unknown-linux-gnu: 84 | name: Linux i686 85 | runs-on: ubuntu-latest 86 | steps: 87 | - uses: actions/checkout@v4 88 | - uses: dtolnay/rust-toolchain@master 89 | with: 90 | toolchain: stable 91 | targets: i686-unknown-linux-gnu 92 | - name: Install cross compile tools 93 | run: sudo apt-get install -y gcc-multilib libc6-i386 libc6-dev-i386 94 | - run: cargo check --target i686-unknown-linux-gnu 95 | - run: cargo test --target i686-unknown-linux-gnu 96 | - name: check constrandom 97 | run: cargo check --no-default-features --features compile-time-rng --target i686-unknown-linux-gnu 98 | - name: Install 1.72.0 99 | uses: dtolnay/rust-toolchain@master 100 | with: 101 | toolchain: 1.72.0 102 | targets: i686-unknown-linux-gnu 103 | - run: cargo +1.72.0 check --target i686-unknown-linux-gnu 104 | - name: check constrandom 105 | run: cargo +1.72.0 check --no-default-features --features compile-time-rng --target i686-unknown-linux-gnu 106 | x86_64-unknown-linux-gnu: 107 | name: Linux x86_64 108 | runs-on: ubuntu-latest 109 | env: 110 | RUSTFLAGS: -C target-cpu=skylake -C target-feature=+aes 111 | steps: 112 | - uses: actions/checkout@v4 113 | - uses: dtolnay/rust-toolchain@master 114 | with: 115 | toolchain: nightly 116 | targets: x86_64-unknown-linux-gnu 117 | - run: cargo check --target x86_64-unknown-linux-gnu 118 | - run: cargo test --target x86_64-unknown-linux-gnu 119 | - name: check constrandom 120 | run: cargo check --no-default-features --features compile-time-rng --target x86_64-unknown-linux-gnu 121 | - name: Install 1.72.0 122 | uses: dtolnay/rust-toolchain@master 123 | with: 124 | toolchain: 1.72.0 125 | - run: cargo +1.72.0 check --target x86_64-unknown-linux-gnu 126 | - name: check constrandom 127 | run: cargo +1.72.0 check --no-default-features --features compile-time-rng --target x86_64-unknown-linux-gnu 128 | thumbv6m: 129 | name: thumbv6m 130 | runs-on: ubuntu-latest 131 | steps: 132 | - uses: actions/checkout@v4 133 | - uses: dtolnay/rust-toolchain@master 134 | with: 135 | toolchain: stable 136 | targets: thumbv6m-none-eabi 137 | - run: cargo check --target thumbv6m-none-eabi --no-default-features 138 | wasm32-unknown-unknown: 139 | name: wasm 140 | runs-on: ubuntu-latest 141 | steps: 142 | - uses: actions/checkout@v4 143 | - uses: dtolnay/rust-toolchain@master 144 | with: 145 | toolchain: stable 146 | targets: wasm32-unknown-unknown 147 | - run: cargo check --target wasm32-unknown-unknown --no-default-features 148 | no_std: 149 | name: no-std build 150 | runs-on: ubuntu-latest 151 | steps: 152 | - uses: actions/checkout@v4 153 | - uses: dtolnay/rust-toolchain@master 154 | with: 155 | toolchain: nightly 156 | - run: cargo build --manifest-path=no_std_test/Cargo.toml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | Cargo.lock 3 | target 4 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "ahash" 3 | version = "0.8.10" 4 | authors = ["Tom Kaitchuck "] 5 | license = "MIT OR Apache-2.0" 6 | description = "A non-cryptographic hash function using AES-NI for high performance" 7 | documentation = "https://docs.rs/ahash" 8 | repository = "https://github.com/tkaitchuck/ahash" 9 | keywords = ["hash", "hasher", "hashmap", "aes", "no-std"] 10 | categories = ["algorithms", "data-structures", "no-std"] 11 | edition = "2018" 12 | readme = "README.md" 13 | build = "./build.rs" 14 | exclude = ["/smhasher", "/benchmark_tools"] 15 | rust-version = "1.60.0" 16 | 17 | [lib] 18 | name = "ahash" 19 | path = "src/lib.rs" 20 | test = true 21 | doctest = true 22 | bench = true 23 | doc = true 24 | 25 | [features] 26 | default = ["std", "runtime-rng"] 27 | 28 | # Enabling this will enable `AHashMap` and `AHashSet`. 29 | std = [] 30 | 31 | # Runtime random key generation using getrandom. 32 | runtime-rng = ["getrandom"] 33 | 34 | # This is an alternative to runtime key generation which does compile time key generation if runtime-rng is not available. 35 | # (If runtime-rng is enabled this does nothing.) 36 | # If this is on (and runtime-rng is off) it implies the produced binary will not be identical. 37 | # If this is disabled and runtime-rng is unavailable constant keys are used. 38 | compile-time-rng = ["const-random"] 39 | 40 | # Do not use any random number generator (either at compile time or runtime) 41 | # If either runtime-rng or compile-time-rng are enabled this does nothing. 42 | no-rng = [] 43 | 44 | # in case this is being used on an architecture lacking core::sync::atomic::AtomicUsize and friends 45 | atomic-polyfill = [ "dep:portable-atomic", "once_cell/critical-section"] 46 | 47 | # Nightly-only support for AES intrinsics on 32-bit ARM 48 | nightly-arm-aes = [] 49 | 50 | [[bench]] 51 | name = "ahash" 52 | path = "tests/bench.rs" 53 | harness = false 54 | 55 | [[bench]] 56 | name = "map" 57 | path = "tests/map_tests.rs" 58 | harness = false 59 | 60 | [profile.test] 61 | opt-level = 2 62 | lto = 'fat' 63 | 64 | [profile.release] 65 | opt-level = 3 66 | debug = false 67 | lto = 'fat' 68 | debug-assertions = false 69 | codegen-units = 1 70 | 71 | [profile.bench] 72 | opt-level = 3 73 | debug = false 74 | lto = 'fat' 75 | debug-assertions = false 76 | codegen-units = 1 77 | 78 | [build-dependencies] 79 | version_check = "0.9.4" 80 | 81 | [dependencies] 82 | const-random = { version = "0.1.17", optional = true } 83 | serde = { version = "1.0.117", optional = true } 84 | cfg-if = "1.0" 85 | portable-atomic = { version = "1.0.0", optional = true } 86 | getrandom = { version = "0.3.1", optional = true } 87 | zerocopy = { version = "0.8.24", default-features = false, features = ["simd"] } 88 | 89 | [target.'cfg(not(all(target_arch = "arm", target_os = "none")))'.dependencies] 90 | once_cell = { version = "1.18.0", default-features = false, features = ["alloc"] } 91 | 92 | [dev-dependencies] 93 | no-panic = "0.1.10" 94 | criterion = {version = "0.3.2", features = ["html_reports"] } 95 | seahash = "4.0" 96 | fnv = "1.0.5" 97 | fxhash = "0.2.1" 98 | hex = "0.4.2" 99 | rand = "0.8.5" 100 | pcg-mwc = "0.2.1" 101 | serde_json = "1.0.59" 102 | hashbrown = "0.14.3" 103 | smallvec = "1.13.1" 104 | 105 | [package.metadata.docs.rs] 106 | rustc-args = ["-C", "target-feature=+aes"] 107 | rustdoc-args = ["-C", "target-feature=+aes"] 108 | features = ["std"] 109 | -------------------------------------------------------------------------------- /FAQ.md: -------------------------------------------------------------------------------- 1 | ## How does aHash prevent DOS attacks 2 | 3 | AHash is designed to [prevent an adversary that does not know the key from being able to create hash collisions or partial collisions.](https://github.com/tkaitchuck/aHash/wiki/How-aHash-is-resists-DOS-attacks) 4 | 5 | If you are a cryptographer and would like to help review aHash's algorithm, please post a comment [here](https://github.com/tkaitchuck/aHash/issues/11). 6 | 7 | In short, this is achieved by ensuring that: 8 | 9 | * aHash is designed to [resist differential crypto analysis](https://github.com/tkaitchuck/aHash/wiki/How-aHash-is-resists-DOS-attacks#differential-analysis). Meaning it should not be possible to devise a scheme to "cancel" out a modification of the internal state from a block of input via some corresponding change in a subsequent block of input. 10 | * This is achieved by not performing any "premixing" - This reversible mixing gave previous hashes such as murmurhash confidence in their quality, but could be undone by a deliberate attack. 11 | * Before it is used each chunk of input is "masked" such as by xoring it with an unpredictable value. 12 | * aHash obeys the '[strict avalanche criterion](https://en.wikipedia.org/wiki/Avalanche_effect#Strict_avalanche_criterion)': 13 | Each bit of input has the potential to flip every bit of the output. 14 | * Similarly, each bit in the key can affect every bit in the output. 15 | * Input bits never effect just one, or a very few, bits in intermediate state. This is specifically designed to prevent the sort of 16 | [differential attacks launched by the sipHash authors](https://emboss.github.io/blog/2012/12/14/breaking-murmur-hash-flooding-dos-reloaded/) which cancel previous inputs. 17 | * The `finish` call at the end of the hash is designed to not expose individual bits of the internal state. 18 | * For example in the main algorithm 256bits of state and 256bits of keys are reduced to 64 total bits using 3 rounds of AES encryption. 19 | Reversing this is more than non-trivial. Most of the information is by definition gone, and any given bit of the internal state is fully diffused across the output. 20 | * In both aHash and its fallback the internal state is divided into two halves which are updated by two unrelated techniques using the same input. - This means that if there is a way to attack one of them it likely won't be able to attack both of them at the same time. 21 | * It is deliberately difficult to 'chain' collisions. (This has been the major technique used to weaponize attacks on other hash functions) 22 | 23 | More details are available on [the wiki](https://github.com/tkaitchuck/aHash/wiki/How-aHash-is-resists-DOS-attacks). 24 | 25 | ## Why not use a cryptographic hash in a hashmap. 26 | 27 | Cryptographic hashes are designed to make is nearly impossible to find two items that collide when the attacker has full control 28 | over the input. This has several implications: 29 | 30 | * They are very difficult to construct, and have to go to a lot of effort to ensure that collisions are not possible. 31 | * They have no notion of a 'key'. Rather, they are fully deterministic and provide exactly one hash for a given input. 32 | 33 | For a HashMap the requirements are different. 34 | 35 | * Speed is very important, especially for short inputs. Often the key for a HashMap is a single `u32` or similar, and to be effective 36 | the bucket that it should be hashed to needs to be computed in just a few CPU cycles. 37 | * A hashmap does not need to provide a hard and fast guarantee that no two inputs will ever collide. Hence, hashCodes are not 256bits 38 | but are just 64 or 32 bits in length. Often the first thing done with the hashcode is to truncate it further to compute which among a few buckets should be used for a key. 39 | * Here collisions are expected, and a cheap to deal with provided there is no systematic way to generated huge numbers of values that all 40 | go to the same bucket. 41 | * This also means that unlike a cryptographic hash partial collisions matter. It doesn't do a hashmap any good to produce a unique 256bit hash if 42 | the lower 12 bits are all the same. This means that even a provably irreversible hash would not offer protection from a DOS attack in a hashmap 43 | because an attacker can easily just brute force the bottom N bits. 44 | 45 | From a cryptography point of view, a hashmap needs something closer to a block cypher. 46 | Where the input can be quickly mixed in a way that cannot be reversed without knowing a key. 47 | 48 | ## Why isn't aHash cryptographically secure 49 | 50 | It is not designed to be. 51 | Attempting to use aHash as a secure hash will likely fail to hold up for several reasons: 52 | 53 | 1. aHash relies on random keys which are assumed to not be observable by an attacker. For a cryptographic hash all inputs can be seen and controlled by the attacker. 54 | 2. aHash has not yet gone through peer review, which is a pre-requisite for security critical algorithms. 55 | 3. Because aHash uses reduced rounds of AES as opposed to the standard of 10. Things like the SQUARE attack apply to part of the internal state. 56 | (These are mitigated by other means to prevent producing collections, but would be a problem in other contexts). 57 | 4. Like any cypher based hash, it will show certain statistical deviations from truly random output when comparing a (VERY) large number of hashes. 58 | (By definition cyphers have fewer collisions than truly random data.) 59 | 60 | There are efforts to build a secure hash function that uses AES-NI for acceleration, but aHash is not one of them. 61 | 62 | ## How is aHash so fast 63 | 64 | AHash uses a number of tricks. 65 | 66 | One trick is taking advantage of specialization. If aHash is compiled on nightly it will take 67 | advantage of specialized hash implementations for strings, slices, and primitives. 68 | 69 | Another is taking advantage of hardware instructions. 70 | When it is available aHash uses AES rounds using the AES-NI instruction. AES-NI is very fast (on an intel i7-6700 it 71 | is as fast as a 64 bit multiplication.) and handles 16 bytes of input at a time, while being a very strong permutation. 72 | 73 | This is obviously much faster than most standard approaches to hashing, and does a better job of scrambling data than most non-secure hashes. 74 | 75 | On an intel i7-6700 compiled on nightly Rust with flags `-C opt-level=3 -C target-cpu=native -C codegen-units=1`: 76 | 77 | | Input | SipHash 1-3 time | FnvHash time|FxHash time| aHash time| aHash Fallback* | 78 | |----------------|-----------|-----------|-----------|-----------|---------------| 79 | | u8 | 9.3271 ns | 0.808 ns | **0.594 ns** | 0.7704 ns | 0.7664 ns | 80 | | u16 | 9.5139 ns | 0.803 ns | **0.594 ns** | 0.7653 ns | 0.7704 ns | 81 | | u32 | 9.1196 ns | 1.4424 ns | **0.594 ns** | 0.7637 ns | 0.7712 ns | 82 | | u64 | 10.854 ns | 3.0484 ns | **0.628 ns** | 0.7788 ns | 0.7888 ns | 83 | | u128 | 12.465 ns | 7.0728 ns | 0.799 ns | **0.6174 ns** | 0.6250 ns | 84 | | 1 byte string | 11.745 ns | 2.4743 ns | 2.4000 ns | **1.4921 ns** | 1.5861 ns | 85 | | 3 byte string | 12.066 ns | 3.5221 ns | 2.9253 ns | **1.4745 ns** | 1.8518 ns | 86 | | 4 byte string | 11.634 ns | 4.0770 ns | 1.8818 ns | **1.5206 ns** | 1.8924 ns | 87 | | 7 byte string | 14.762 ns | 5.9780 ns | 3.2282 ns | **1.5207 ns** | 1.8933 ns | 88 | | 8 byte string | 13.442 ns | 4.0535 ns | 2.9422 ns | **1.6262 ns** | 1.8929 ns | 89 | | 15 byte string | 16.880 ns | 8.3434 ns | 4.6070 ns | **1.6265 ns** | 1.7965 ns | 90 | | 16 byte string | 15.155 ns | 7.5796 ns | 3.2619 ns | **1.6262 ns** | 1.8011 ns | 91 | | 24 byte string | 16.521 ns | 12.492 ns | 3.5424 ns | **1.6266 ns** | 2.8311 ns | 92 | | 68 byte string | 24.598 ns | 50.715 ns | 5.8312 ns | **4.8282 ns** | 5.4824 ns | 93 | | 132 byte string| 39.224 ns | 119.96 ns | 11.777 ns | **6.5087 ns** | 9.1459 ns | 94 | |1024 byte string| 254.00 ns | 1087.3 ns | 156.41 ns | **25.402 ns** | 54.566 ns | 95 | 96 | * Fallback refers to the algorithm aHash would use if AES instructions are unavailable. 97 | For reference a hash that does nothing (not even reads the input data takes) **0.520 ns**. So that represents the fastest 98 | possible time. 99 | 100 | As you can see above aHash like `FxHash` provides a large speedup over `SipHash-1-3` which is already nearly twice as fast as `SipHash-2-4`. 101 | 102 | Rust's HashMap by default uses `SipHash-1-3` because faster hash functions such as `FxHash` are predictable and vulnerable to denial of 103 | service attacks. While `aHash` has both very strong scrambling and very high performance. 104 | 105 | AHash performs well when dealing with large inputs because aHash reads 8 or 16 bytes at a time. (depending on availability of AES-NI) 106 | 107 | Because of this, and its optimized logic, `aHash` is able to outperform `FxHash` with strings. 108 | It also provides especially good performance dealing with unaligned input. 109 | (Notice the big performance gaps between 3 vs 4, 7 vs 8 and 15 vs 16 in `FxHash` above) 110 | 111 | ### Which CPUs can use the hardware acceleration 112 | 113 | Hardware AES instructions are built into Intel processors built after 2010 and AMD processors after 2012. 114 | It is also available on [many other CPUs](https://en.wikipedia.org/wiki/AES_instruction_set) should in eventually 115 | be able to get aHash to work. However, only X86 and X86-64 are the only supported architectures at the moment, as currently 116 | they are the only architectures for which Rust provides an intrinsic. 117 | 118 | aHash also uses `sse2` and `sse3` instructions. X86 processors that have `aesni` also have these instruction sets. 119 | -------------------------------------------------------------------------------- /LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | Copyright (c) 2018 Tom Kaitchuck 2 | 3 | Permission is hereby granted, free of charge, to any 4 | person obtaining a copy of this software and associated 5 | documentation files (the "Software"), to deal in the 6 | Software without restriction, including without 7 | limitation the rights to use, copy, modify, merge, 8 | publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software 10 | is furnished to do so, subject to the following 11 | conditions: 12 | 13 | The above copyright notice and this permission notice 14 | shall be included in all copies or substantial portions 15 | of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF 18 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED 19 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 20 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT 21 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 22 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR 24 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 25 | DEALINGS IN THE SOFTWARE. 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # aHash ![Build Status](https://img.shields.io/github/actions/workflow/status/tkaitchuck/aHash/rust.yml?branch=master) ![Licence](https://img.shields.io/crates/l/ahash) ![Downloads](https://img.shields.io/crates/d/ahash) 2 | 3 | AHash is the [fastest](https://github.com/tkaitchuck/aHash/blob/master/compare/readme.md#Speed), 4 | [DOS resistant hash](https://github.com/tkaitchuck/aHash/wiki/How-aHash-is-resists-DOS-attacks) currently available in Rust. 5 | AHash is intended *exclusively* for use in in-memory hashmaps. 6 | 7 | AHash's output is of [high quality](https://github.com/tkaitchuck/aHash/blob/master/compare/readme.md#Quality) but aHash is **not** a cryptographically secure hash. 8 | 9 | ## Design 10 | 11 | Because AHash is a keyed hash, each map will produce completely different hashes, which cannot be predicted without knowing the keys. 12 | [This prevents DOS attacks where an attacker sends a large number of items whose hashes collide that get used as keys in a hashmap.](https://github.com/tkaitchuck/aHash/wiki/How-aHash-is-resists-DOS-attacks) 13 | 14 | This also avoids [accidentally quadratic behavior by reading from one map and writing to another.](https://accidentallyquadratic.tumblr.com/post/153545455987/rust-hash-iteration-reinsertion) 15 | 16 | ## Goals and Non-Goals 17 | 18 | AHash does *not* have a fixed standard for its output. This allows it to improve over time. For example, 19 | if any faster algorithm is found, aHash will be updated to incorporate the technique. 20 | Similarly, should any flaw in aHash's DOS resistance be found, aHash will be changed to correct the flaw. 21 | 22 | Because it does not have a fixed standard, different computers or computers on different versions of the code will observe different hash values. 23 | As such, aHash is not recommended for use other than in-memory maps. Specifically, aHash is not intended for network use or in applications which persist hashed values. 24 | (In these cases `HighwayHash` would be a better choice) 25 | 26 | Additionally, aHash is not intended to be cryptographically secure and should not be used as a MAC, or anywhere which requires a cryptographically secure hash. 27 | (In these cases `SHA-3` would be a better choice) 28 | 29 | ## Usage 30 | 31 | AHash is a drop in replacement for the default implementation of the `Hasher` trait. To construct a `HashMap` using aHash 32 | as its hasher do the following: 33 | 34 | ```rust 35 | use ahash::{AHasher, RandomState}; 36 | use std::collections::HashMap; 37 | 38 | let mut map: HashMap = HashMap::default(); 39 | map.insert(12, 34); 40 | ``` 41 | For convenience, wrappers called `AHashMap` and `AHashSet` are also provided. 42 | These do the same thing with slightly less typing. 43 | ```rust 44 | use ahash::AHashMap; 45 | 46 | let mut map: AHashMap = AHashMap::new(); 47 | map.insert(12, 34); 48 | map.insert(56, 78); 49 | ``` 50 | 51 | ## Flags 52 | 53 | The aHash package has the following flags: 54 | * `std`: This enables features which require the standard library. (On by default) This includes providing the utility classes `AHashMap` and `AHashSet`. 55 | * `serde`: Enables `serde` support for the utility classes `AHashMap` and `AHashSet`. 56 | * `runtime-rng`: To obtain a seed for Hashers will obtain randomness from the operating system. (On by default) 57 | This is done using the [getrandom](https://github.com/rust-random/getrandom) crate. 58 | * `compile-time-rng`: For OS targets without access to a random number generator, `compile-time-rng` provides an alternative. 59 | If `getrandom` is unavailable and `compile-time-rng` is enabled, aHash will generate random numbers at compile time and embed them in the binary. 60 | * `nightly-arm-aes`: To use AES instructions on 32-bit ARM, which requires nightly. This is not needed on AArch64. 61 | This allows for DOS resistance even if there is no random number generator available at runtime (assuming the compiled binary is not public). 62 | This makes the binary non-deterministic. (If non-determinism is a problem see [constrandom's documentation](https://github.com/tkaitchuck/constrandom#deterministic-builds)) 63 | 64 | If both `runtime-rng` and `compile-time-rng` are enabled the `runtime-rng` will take precedence and `compile-time-rng` will do nothing. 65 | If neither flag is set, seeds can be supplied by the application. [Multiple apis](https://docs.rs/ahash/latest/ahash/random_state/struct.RandomState.html) 66 | are available to do this. 67 | 68 | ## Comparison with other hashers 69 | 70 | A full comparison with other hashing algorithms can be found [here](https://github.com/tkaitchuck/aHash/blob/master/compare/readme.md) 71 | 72 | ![Hasher performance](https://docs.google.com/spreadsheets/d/e/2PACX-1vSK7Li2nS-Bur9arAYF9IfT37MP-ohAe1v19lZu5fd9MajI1fSveLAQZyEie4Ea9k5-SWHTff7nL2DW/pubchart?oid=1323618938&format=image) 73 | 74 | For a more representative performance comparison which includes the overhead of using a HashMap, see [HashBrown's benchmarks](https://github.com/rust-lang/hashbrown#performance) 75 | as HashBrown now uses aHash as its hasher by default. 76 | 77 | ## Hash quality 78 | 79 | AHash passes the full [SMHasher test suite](https://github.com/rurban/smhasher). 80 | 81 | The code to reproduce the result, and the full output [are checked into the repo](https://github.com/tkaitchuck/aHash/tree/master/smhasher). 82 | 83 | ## Additional FAQ 84 | 85 | A separate FAQ document is maintained [here](https://github.com/tkaitchuck/aHash/blob/master/FAQ.md). 86 | If you have questions not covered there, open an issue [here](https://github.com/tkaitchuck/aHash/issues). 87 | 88 | ## License 89 | 90 | Licensed under either of: 91 | 92 | * Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0) 93 | * MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT) 94 | 95 | at your option. 96 | 97 | ## Contribution 98 | 99 | Unless you explicitly state otherwise, any contribution intentionally submitted 100 | for inclusion in the work by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any 101 | additional terms or conditions. 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | -------------------------------------------------------------------------------- /benchmark_tools/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "aHash" 3 | version = "0.1.0" 4 | edition = "2018" 5 | publish = false 6 | description = "A tool for benchmarking hashing algorithms" 7 | 8 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 9 | 10 | [dependencies] 11 | once_cell = "1.8.0" 12 | byteorder = "1.4.3" 13 | ahash = "0.7.4" -------------------------------------------------------------------------------- /benchmark_tools/benchmark_tools.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /benchmark_tools/src/data_reader.rs: -------------------------------------------------------------------------------- 1 | use std::fs::File; 2 | use std::hash::{BuildHasher, Hasher}; 3 | use std::io::{BufReader, BufRead, Error, Read}; 4 | use byteorder::{ReadBytesExt, LittleEndian}; 5 | 6 | pub fn test_hasher(input_file: File, builder: B) -> Result { 7 | let mut result: u64 = 0; 8 | let mut input = BufReader::new(input_file); 9 | let mut hasher = builder.build_hasher(); 10 | while input.has_data_left()? { 11 | let code = input.read_u8()?; 12 | match code { 13 | b'1' => { 14 | let i = input.read_u8()?; 15 | hasher.write_u8(i); 16 | } 17 | b'2' => { 18 | let i = input.read_u16::()?; 19 | hasher.write_u16(i); 20 | } 21 | b'4' => { 22 | let i = input.read_u32::()?; 23 | hasher.write_u32(i); 24 | } 25 | b'8' => { 26 | let i = input.read_u64::()?; 27 | hasher.write_u64(i); 28 | } 29 | b'B' => { 30 | let i = input.read_u128::()?; 31 | hasher.write_u128(i); 32 | } 33 | b'u' => { 34 | let i = input.read_u64::()?; 35 | hasher.write_usize(i as usize); 36 | } 37 | b's' => { 38 | let len = input.read_u32::()?; 39 | let mut slice = vec![0; len as usize]; 40 | input.read_exact(&mut slice[..])?; 41 | hasher.write(&slice[..]); 42 | } 43 | b'f' => { 44 | result = result.wrapping_add(hasher.finish()); 45 | hasher = builder.build_hasher(); 46 | } 47 | code => panic!("Unexpected code: {}", code) 48 | } 49 | } 50 | Ok(result) 51 | } -------------------------------------------------------------------------------- /benchmark_tools/src/main.rs: -------------------------------------------------------------------------------- 1 | #![feature(buf_read_has_data_left)] 2 | 3 | mod persisting_hasher; 4 | mod data_reader; 5 | 6 | use persisting_hasher::*; 7 | use data_reader::*; 8 | use std::collections::HashMap; 9 | use std::fs::File; 10 | use std::time::SystemTime; 11 | use std::alloc::System; 12 | 13 | fn capture_output_example() { 14 | let builder = PersistingHasherBuilder::default(); 15 | let mut map = HashMap::with_capacity_and_hasher(10, builder); 16 | map.insert(1, 2); 17 | map.insert(3, 4); 18 | let builder = PersistingHasherBuilder::default(); 19 | let mut map = HashMap::with_capacity_and_hasher(10, builder); 20 | map.insert("1", 2); 21 | map.insert("3", 4); 22 | PersistingHasherBuilder::default().flush(); 23 | } 24 | 25 | fn main() { 26 | // capture_output_example(); 27 | 28 | //Given a previously captured set of hashed data, time how long it takes using a different algorithm. 29 | let file = File::open("hash_output-295253").unwrap(); 30 | let rand = ahash::RandomState::new(); 31 | let start_time = SystemTime::now(); 32 | let result = test_hasher(file, rand).unwrap(); 33 | println!("Completed after {:?} with result: {:x}", SystemTime::now().duration_since(start_time).unwrap(), result) 34 | } 35 | -------------------------------------------------------------------------------- /benchmark_tools/src/persisting_hasher.rs: -------------------------------------------------------------------------------- 1 | use std::convert::TryInto; 2 | use std::fs::File; 3 | use std::hash::{Hasher, BuildHasher}; 4 | use std::io::{BufWriter, Write}; 5 | use std::sync::{Arc, Mutex}; 6 | use std::sync::atomic::{AtomicU64, Ordering}; 7 | use once_cell::sync::OnceCell; 8 | use std::process::id; 9 | 10 | static GLOBAL_COUNT: AtomicU64 = AtomicU64::new(0); 11 | static GLOBAL_OUT: OnceCell>>> = OnceCell::new(); 12 | 13 | pub struct PersistingHasherBuilder { 14 | id: u64, 15 | out: Arc>>, 16 | } 17 | 18 | impl PersistingHasherBuilder { 19 | pub fn flush(&self) { 20 | let mut guard = self.out.lock().unwrap(); 21 | guard.flush().unwrap(); 22 | } 23 | } 24 | 25 | impl Default for PersistingHasherBuilder { 26 | fn default() -> Self { 27 | PersistingHasherBuilder { 28 | id: GLOBAL_COUNT.fetch_add(1, Ordering::SeqCst), 29 | out: GLOBAL_OUT.get_or_init(|| 30 | Arc::new(Mutex::new(BufWriter::new(File::create( 31 | format!("hash_output-{}", id())).unwrap())))).clone(), 32 | } 33 | } 34 | } 35 | 36 | impl BuildHasher for PersistingHasherBuilder { 37 | type Hasher = PersistingHasher; 38 | 39 | fn build_hasher(&self) -> Self::Hasher { 40 | PersistingHasher { 41 | hash: self.id, 42 | out: self.out.clone(), 43 | } 44 | } 45 | } 46 | 47 | pub struct PersistingHasher { 48 | /// Used to compute a hash 49 | hash: u64, 50 | /// File to write data out to 51 | out: Arc>>, 52 | } 53 | 54 | impl PersistingHasher { 55 | fn add_to_hash(&mut self, i: u64) { 56 | self.hash = self 57 | .hash 58 | .rotate_right(31) 59 | .wrapping_add(i) 60 | .wrapping_mul(0xcfee444d8b59a89b); 61 | } 62 | } 63 | 64 | impl Hasher for PersistingHasher { 65 | fn finish(&self) -> u64 { 66 | let mut guard = self.out.lock().unwrap(); 67 | write!(guard, "f").unwrap(); 68 | self.hash 69 | } 70 | 71 | fn write(&mut self, mut bytes: &[u8]) { 72 | let read_u64 = |bytes: &[u8]| u64::from_ne_bytes(bytes[..8].try_into().unwrap()); 73 | 74 | while bytes.len() >= 8 { 75 | self.add_to_hash(read_u64(bytes)); 76 | bytes = &bytes[8..]; 77 | } 78 | if bytes.len() >= 4 { 79 | self.add_to_hash(u32::from_ne_bytes(bytes[..4].try_into().unwrap()) as u64); 80 | bytes = &bytes[4..]; 81 | } 82 | if bytes.len() >= 2 { 83 | self.add_to_hash(u16::from_ne_bytes(bytes[..2].try_into().unwrap()) as u64); 84 | bytes = &bytes[2..]; 85 | } 86 | if bytes.len() >= 1 { 87 | self.add_to_hash(bytes[0] as u64); 88 | } 89 | 90 | let mut guard = self.out.lock().unwrap(); 91 | write!(guard, "s").unwrap(); 92 | guard.write_all(&(bytes.len() as u32).to_le_bytes()).unwrap(); 93 | guard.write_all(bytes).unwrap(); 94 | } 95 | 96 | fn write_u8(&mut self, i: u8) { 97 | self.add_to_hash(i as u64); 98 | 99 | let mut guard = self.out.lock().unwrap(); 100 | write!(guard, "1").unwrap(); 101 | guard.write_all(&i.to_le_bytes()).unwrap(); 102 | } 103 | 104 | fn write_u16(&mut self, i: u16) { 105 | self.add_to_hash(i as u64); 106 | 107 | let mut guard = self.out.lock().unwrap(); 108 | write!(guard, "2").unwrap(); 109 | guard.write_all(&i.to_le_bytes()).unwrap(); 110 | } 111 | 112 | fn write_u32(&mut self, i: u32) { 113 | self.add_to_hash(i as u64); 114 | 115 | let mut guard = self.out.lock().unwrap(); 116 | write!(guard, "4").unwrap(); 117 | guard.write_all(&i.to_le_bytes()).unwrap(); 118 | } 119 | 120 | fn write_u64(&mut self, i: u64) { 121 | self.add_to_hash(i as u64); 122 | 123 | let mut guard = self.out.lock().unwrap(); 124 | write!(guard, "8").unwrap(); 125 | guard.write_all(&i.to_le_bytes()).unwrap(); 126 | } 127 | 128 | fn write_u128(&mut self, i: u128) { 129 | self.add_to_hash((i >> 64) as u64); 130 | self.add_to_hash(i as u64); 131 | 132 | let mut guard = self.out.lock().unwrap(); 133 | write!(guard, "B").unwrap(); 134 | guard.write_all(&i.to_le_bytes()).unwrap(); 135 | } 136 | 137 | fn write_usize(&mut self, i: usize) { 138 | self.add_to_hash(i as u64); 139 | 140 | let mut guard = self.out.lock().unwrap(); 141 | write!(guard, "u").unwrap(); 142 | guard.write_all(&(i as u64).to_le_bytes()).unwrap(); 143 | } 144 | } -------------------------------------------------------------------------------- /build.rs: -------------------------------------------------------------------------------- 1 | #![deny(warnings)] 2 | 3 | use std::env; 4 | 5 | fn main() { 6 | println!("cargo:rerun-if-changed=build.rs"); 7 | println!("cargo:rustc-check-cfg=cfg(specialize)"); 8 | if let Some(true) = version_check::supports_feature("specialize") { 9 | println!("cargo:rustc-cfg=specialize"); 10 | } 11 | let arch = env::var("CARGO_CFG_TARGET_ARCH").expect("CARGO_CFG_TARGET_ARCH was not set"); 12 | println!("cargo:rustc-check-cfg=cfg(folded_multiply)"); 13 | if arch.eq_ignore_ascii_case("x86_64") 14 | || arch.eq_ignore_ascii_case("aarch64") 15 | || arch.eq_ignore_ascii_case("mips64") 16 | || arch.eq_ignore_ascii_case("powerpc64") 17 | || arch.eq_ignore_ascii_case("riscv64gc") 18 | || arch.eq_ignore_ascii_case("s390x") 19 | { 20 | println!("cargo:rustc-cfg=folded_multiply"); 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /compare/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "ahash-compare" 3 | version = "0.0.1" 4 | authors = ["Tom Kaitchuck "] 5 | license = "MIT OR Apache-2.0" 6 | description = "A hash algorithm benchmark" 7 | documentation = "https://docs.rs/ahash" 8 | repository = "https://github.com/tkaitchuck/ahash" 9 | keywords = ["hash", "hashmap", "aes", "aes-ni", "no-std"] 10 | edition = "2018" 11 | readme = "README.md" 12 | 13 | [features] 14 | default = ["std"] 15 | std = ["ahash/std"] 16 | compile-time-rng = ["ahash/compile-time-rng"] 17 | 18 | [[bench]] 19 | name = "compare" 20 | path = "tests/compare.rs" 21 | harness = false 22 | 23 | [profile.bench] 24 | opt-level = 3 25 | debug = false 26 | lto = 'fat' 27 | debug-assertions = false 28 | codegen-units = 1 29 | 30 | [dependencies] 31 | ahash = { path = "../", default-features = false } 32 | pcg-mwc = "0.2.1" 33 | rand = "0.8.5" 34 | rand_core = "0.6.4" 35 | 36 | [dev-dependencies] 37 | criterion = "0.3.3" 38 | fnv = "1.0.7" 39 | fxhash = "0.2.1" 40 | farmhash = "1.1.5" 41 | highway = "1.1.0" 42 | metrohash = "1.0.6" 43 | siphasher = "1" 44 | t1ha = "0.1.0" 45 | wyhash = "0.5" 46 | xxhash-rust = {version = "0.8", features = ["xxh3"]} 47 | -------------------------------------------------------------------------------- /compare/Table.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tkaitchuck/aHash/5587f60d8ed16fb503267fed185156ed0f680918/compare/Table.png -------------------------------------------------------------------------------- /compare/readme.md: -------------------------------------------------------------------------------- 1 | # Comparison with other hashers 2 | 3 | [![Comparison chart](Table.png)](https://docs.google.com/spreadsheets/d/e/2PACX-1vSK7Li2nS-Bur9arAYF9IfT37MP-ohAe1v19lZu5fd9MajI1fSveLAQZyEie4Ea9k5-SWHTff7nL2DW/pubhtml?gid=0&single=true) 4 | 5 | ## Design 6 | 7 | AHash is designed *exclusively* for use in in-memory hashmaps. It does not have a fixed standard, but uses different 8 | algorithms depending on the availability of hardware instructions. Whenever possible aHash uses the [hardware AES instruction](https://en.wikipedia.org/wiki/AES_instruction_set) 9 | on X86 processors when it is available. If no specialized instructions are available, it falls back on an 10 | [algorithm based on multiplication](https://github.com/tkaitchuck/aHash/wiki/AHash-fallback-algorithm)). 11 | 12 | Because aHash does not have a fixed standard for its output, it can optimize its performance to a much greater extent than 13 | algorithms which don't have this flexibility. This is great for Hashmaps but makes aHash inappropriate for applications where 14 | a hash needs to be sent over the network, or persisted. 15 | 16 | ## Quality 17 | 18 | **AHash passes the full [SMHasher test suite](https://github.com/rurban/smhasher)** both with and without AES support. 19 | The output of the tests is checked into the [smhasher subdirectory](../smhasher). 20 | 21 | At **over 50GB/s** aHash is the fastest algorithm to pass the full test suite by more than a factor of 2. 22 | Even the fallback algorithm is in the top 5 in terms of throughput, beating out many other algorithms that rely on SSE and AVX instructions. 23 | 24 | ## Speed 25 | 26 | aHash is the fastest non-trivial hasher implementation in Rust. Below is a comparison with 10 other popular hashing algorithms. 27 | 28 | ![Hasher performance](https://docs.google.com/spreadsheets/d/e/2PACX-1vSK7Li2nS-Bur9arAYF9IfT37MP-ohAe1v19lZu5fd9MajI1fSveLAQZyEie4Ea9k5-SWHTff7nL2DW/pubchart?oid=1323618938&format=image) 29 | 30 | ## DOS resistance 31 | 32 | AHash provides DOS resistance by incorporating random keys into the hash. There is a full explanation [here](https://github.com/tkaitchuck/aHash/wiki/How-aHash-is-resists-DOS-attacks). 33 | 34 | If the `std` feature flag is enabled (On by default) it uses the `getrandom` crate to generate random seeds upon initialization. 35 | If `std` is disabled and the `compile-time-rng` flag is enabled instead it will use the `const-random` to generate random seeds 36 | at compile time and embed them into the application binary. 37 | 38 | If neither `std` or `compile-time-rng` flags are enabled aHash will fall back on using the numeric value of memory addresses as a source of randomness. 39 | This is somewhat strong if ALSR is turned on (it is by default in Rust) but for some platforms this is not available. 40 | As a result this should not be relied on. For this reason it is strongly recommended that if you disable `std` because you program needs to run in a 41 | `no-std` environment to enable the `compile-time-rng` feature flag. 42 | 43 | 44 | # Why use aHash over X 45 | 46 | ## SipHash 47 | 48 | For a hashmap: Because aHash nearly **10x** faster. 49 | 50 | SipHash is however useful in other contexts, such as for a HMAC, where aHash would be completely inappropriate. 51 | 52 | *SipHash-2-4* is designed to provide DOS attack resistance, and has no presently known attacks 53 | against this claim that doesn't involve learning bits of the key. 54 | 55 | SipHash is also available in the "1-3" variant which is about twice as fast as the standard version. 56 | The SipHash authors don't recommend using this variation when DOS attacks are a concern, but there are still no known 57 | practical DOS attacks against the algorithm. Rust has opted for the "1-3" version as the default in `std::collections::HashMap`, 58 | because the speed trade off of "2-4" was not worth it. 59 | 60 | As you can see in the graph above, aHash is **much** faster than even *SipHash-1-3*, and also provides DOS resistance. 61 | 62 | ## FxHash 63 | 64 | In terms of performance, aHash is faster than the FXhash for strings and byte arrays but not primitives. 65 | So it might seem like using Fxhash for hashmaps when the key is a primitive is a good idea. This is *not* the case. 66 | 67 | When FX hash is operating on a 4 or 8 byte input such as a u32 or a u64, it reduces to multiplying the input by a fixed 68 | constant. This is a bad hashing algorithm because it means that lower bits can never be influenced by any higher bit. In 69 | the context of a hashmap where the low order bits are used to determine which bucket to put an item in, this isn't 70 | any better than the identity function. Any keys that happen to end in the same bit pattern will all collide. 71 | Some examples of where this is likely to occur are: 72 | 73 | * Strings encoded in base64 74 | * Null terminated strings (when working with C code) 75 | * Integers that have the lower bits as zeros. (IE any multiple of small power of 2, which isn't a rare pattern in computer programs.) 76 | * For example when taking lengths of data or locations in data it is common for values to 77 | have a multiple of 1024, if these were used as keys in a map they will collide and end up in the same bucket. 78 | 79 | Like any non-keyed hash FxHash can be attacked. But FxHash is so prone to this that you may find yourself doing it accidentally. 80 | 81 | For example, it is possible to [accidentally introduce quadratic behavior by reading from one map in iteration order and writing to another.](https://accidentallyquadratic.tumblr.com/post/153545455987/rust-hash-iteration-reinsertion) 82 | 83 | Fxhash flaws make sense when you understand it for what it is. It is a quick and dirty hash, nothing more. 84 | it was not published and promoted by its creator, it was **found**! 85 | 86 | Because it is error-prone, FxHash should never be used as a default. In specialized instances where the keys are understood 87 | it makes sense, but given that aHash is faster on almost any object, it's probably not worth it. 88 | 89 | ## FnvHash 90 | 91 | FnvHash is also a poor default. It only handles one byte at a time, so its performance really suffers with large inputs. 92 | It is also non-keyed so it is still subject to DOS attacks and [accidentally quadratic behavior.](https://accidentallyquadratic.tumblr.com/post/153545455987/rust-hash-iteration-reinsertion) 93 | 94 | ## MurmurHash, CityHash, MetroHash, FarmHash, and HighwayHash 95 | 96 | Murmur, City, Metro, Farm and Highway are all related, and appear to directly replace one another. 97 | 98 | They are all fine hashing algorithms, they do a good job of scrambling data, but they are all targeted at a different 99 | usecase. They are intended to work in distributed systems where the hash is expected to be the same over time and from one 100 | computer to the next, efficiently hashing large volumes of data. 101 | 102 | This is quite different from the needs of a Hasher used in a hashmap. In a map the typical value is under 10 bytes. None 103 | of these algorithms scale down to handle that small of data at a competitive time. What's more the restriction that they 104 | provide consistent output prevents them from taking advantage of different hardware capabilities on different CPUs. It makes 105 | sense for a hashmap to work differently on a phone than on a server, or in WASM. 106 | 107 | If you need to persist or transmit a hash of a file, then using one of these is probably a good idea. 108 | HighwayHash seems to be the preferred solution as it offers high throughput for large objects and is DOS resistant. 109 | 110 | ## t1ha and XXHash 111 | Like aHash, t1ha and XXHash are targeted at hashmaps and uses hardware instructions including AES for different platforms rather than having a single standard. 112 | Both are fast, but AHash is faster than either one, both with and without AES. This is particularly true of smaller inputs such as integers. 113 | T1ha's hashes do not pass the full of the SMHasher test suite. 114 | 115 | Neither XXHash nor T1ha explicitly claim DOS resistance, but both are keyed hashes, and do not have any obvious way to force collisions. 116 | As of this writing there doesn't appear to be a maintained crate implementing the latest version of t1ha. 117 | 118 | ## wyHash 119 | Similarly, wyHash is targeted at hashmaps. WyHash is quite fast, but is not DOS resistant. 120 | 121 | There are fixed strings which when encountered caused the internal state to reset. This makes wyHash trivial to attack. 122 | 123 | AHash outperforms wyHash across all input sizes, regardless of which CPU instructions are available. 124 | -------------------------------------------------------------------------------- /compare/src/main.rs: -------------------------------------------------------------------------------- 1 | use std::io::Error; 2 | use std::fs::File; 3 | use std::io::Write; 4 | use pcg_mwc::Mwc256XXA64; 5 | use ahash::RandomState; 6 | use std::io::BufWriter; 7 | use std::path::Path; 8 | use rand_core::SeedableRng; 9 | use rand::Rng; 10 | use std::time::Instant; 11 | 12 | 13 | fn main() -> Result<(), Error> { 14 | let mut r = Mwc256XXA64::seed_from_u64(0xe786_c22b_119c_1479); 15 | 16 | let path = Path::new("hash_output"); 17 | 18 | let mut file = BufWriter::new(File::create(path)?); 19 | let hasher = RandomState::with_seeds(r.gen(), r.gen(), r.gen(), r.gen()); 20 | let start = Instant::now(); 21 | let mut sum: u64 = 0; 22 | for i in 0..i32::MAX { 23 | let value = hasher.hash_one(i as u64); 24 | sum = sum.wrapping_add(value); 25 | let value: [u8; 8] = value.to_ne_bytes(); 26 | file.write_all(&value)?; 27 | } 28 | let elapsed = start.elapsed(); 29 | println!("Sum {} Elapsed time: {}", sum, elapsed.as_millis()); 30 | file.flush()?; 31 | Ok(()) 32 | } -------------------------------------------------------------------------------- /compare/tests/compare.rs: -------------------------------------------------------------------------------- 1 | use ahash::RandomState; 2 | use criterion::*; 3 | use farmhash::FarmHasher; 4 | use fnv::FnvBuildHasher; 5 | use fxhash::FxBuildHasher; 6 | use std::hash::{BuildHasher, BuildHasherDefault, Hash, Hasher}; 7 | use xxhash_rust::xxh3::Xxh3Builder; 8 | 9 | fn ahash(k: &K, builder: &RandomState) -> u64 { 10 | let mut hasher = builder.build_hasher(); 11 | k.hash(&mut hasher); 12 | hasher.finish() 13 | } 14 | 15 | fn generic_hash(key: &K, builder: &B) -> u64 { 16 | let mut hasher = builder.build_hasher(); 17 | key.hash(&mut hasher); 18 | hasher.finish() 19 | } 20 | 21 | fn create_string(len: usize) -> String { 22 | let mut string = String::default(); 23 | for pos in 1..=len { 24 | let c = (48 + (pos % 10) as u8) as char; 25 | string.push(c); 26 | } 27 | string 28 | } 29 | 30 | fn compare_ahash(c: &mut Criterion) { 31 | let builder = RandomState::new(); 32 | let test = "compare_ahash"; 33 | for num in &[1, 3, 7, 15, 31, 63, 127, 255, 511, 1023] { 34 | let name = "string".to_owned() + &num.to_string(); 35 | let string = create_string(*num); 36 | c.bench_with_input(BenchmarkId::new(test, &name), &string, |bencher, s| { 37 | bencher.iter(|| black_box(ahash(s, &builder))); 38 | }); 39 | } 40 | } 41 | 42 | fn compare_other(c: &mut Criterion, test: &str, builder: B) { 43 | for num in &[1, 3, 7, 15, 31, 63, 127, 255, 511, 1023] { 44 | let name = "string".to_owned() + &num.to_string(); 45 | let string = create_string(*num); 46 | c.bench_with_input(BenchmarkId::new(test, &name), &string, |bencher, s| { 47 | bencher.iter(|| black_box(generic_hash(&s, &builder))); 48 | }); 49 | } 50 | } 51 | 52 | fn compare_farmhash(c: &mut Criterion) { 53 | let int: u64 = 1234; 54 | let string = create_string(1024); 55 | let builder = BuildHasherDefault::::default(); 56 | compare_other(c, "compare_farmhash", builder) 57 | } 58 | 59 | fn compare_fnvhash(c: &mut Criterion) { 60 | let int: u64 = 1234; 61 | let string = create_string(1024); 62 | let builder = FnvBuildHasher::default(); 63 | compare_other(c, "compare_fnvhash", builder) 64 | } 65 | 66 | fn compare_fxhash(c: &mut Criterion) { 67 | let int: u64 = 1234; 68 | let string = create_string(1024); 69 | let builder = FxBuildHasher::default(); 70 | compare_other(c, "compare_fxhash", builder) 71 | } 72 | 73 | fn compare_highway(c: &mut Criterion) { 74 | let int: u64 = 1234; 75 | let string = create_string(1024); 76 | let builder = highway::HighwayBuildHasher::default(); 77 | compare_other(c, "compare_highway", builder) 78 | } 79 | 80 | fn compare_metro(c: &mut Criterion) { 81 | let int: u64 = 1234; 82 | let string = create_string(1024); 83 | let builder = metrohash::MetroBuildHasher::default(); 84 | compare_other(c, "compare_metro", builder) 85 | } 86 | 87 | fn compare_t1ha(c: &mut Criterion) { 88 | let int: u64 = 1234; 89 | let string = create_string(1024); 90 | let builder = t1ha::T1haBuildHasher::default(); 91 | compare_other(c, "compare_t1ha", builder) 92 | } 93 | 94 | fn compare_sip13(c: &mut Criterion) { 95 | let int: u64 = 1234; 96 | let string = create_string(1024); 97 | let builder = BuildHasherDefault::::default(); 98 | compare_other(c, "compare_sip13", builder) 99 | } 100 | 101 | fn compare_sip24(c: &mut Criterion) { 102 | let int: u64 = 1234; 103 | let string = create_string(1024); 104 | let builder = BuildHasherDefault::::default(); 105 | compare_other(c, "compare_sip24", builder) 106 | } 107 | 108 | fn compare_wyhash(c: &mut Criterion) { 109 | let int: u64 = 1234; 110 | let string = create_string(1024); 111 | let builder = BuildHasherDefault::::default(); 112 | compare_other(c, "compare_wyhash", builder) 113 | } 114 | 115 | fn compare_xxhash(c: &mut Criterion) { 116 | let int: u64 = 1234; 117 | let string = create_string(1024); 118 | let builder = Xxh3Builder::default(); 119 | compare_other(c, "compare_xxhash", builder) 120 | } 121 | 122 | criterion_main!(compare); 123 | criterion_group!( 124 | compare, 125 | compare_ahash, 126 | compare_farmhash, 127 | compare_fnvhash, 128 | compare_fxhash, 129 | compare_highway, 130 | compare_metro, 131 | compare_t1ha, 132 | compare_sip13, 133 | compare_sip24, 134 | compare_wyhash, 135 | compare_xxhash, 136 | ); 137 | -------------------------------------------------------------------------------- /no_std_test/Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | 3 | [package] 4 | name = "no_std_test" 5 | version = "0.1.0" 6 | edition = "2018" 7 | authors = ["Stephen Chung"] 8 | description = "no-std test application" 9 | 10 | [dependencies] 11 | ahash = { path = "../", default_features = false } 12 | wee_alloc = { version = "0.4.5", default_features = false } 13 | 14 | [profile.dev] 15 | panic = "abort" 16 | 17 | [profile.release] 18 | opt-level = "z" # optimize for size 19 | debug = false 20 | rpath = false 21 | debug-assertions = false 22 | codegen-units = 1 23 | panic = "abort" 24 | 25 | [profile.unix] 26 | inherits = "release" 27 | lto = true 28 | 29 | [profile.windows] 30 | inherits = "release" 31 | 32 | [profile.macos] 33 | inherits = "release" 34 | lto = "fat" 35 | -------------------------------------------------------------------------------- /no_std_test/src/main.rs: -------------------------------------------------------------------------------- 1 | //! This is a bare-bones `no-std` application that hashes a value and 2 | //! uses the hash value as the return value. 3 | #![no_main] 4 | #![no_std] 5 | #![feature(alloc_error_handler, core_intrinsics, lang_items)] 6 | 7 | #[global_allocator] 8 | static ALLOC: wee_alloc::WeeAlloc = wee_alloc::WeeAlloc::INIT; 9 | 10 | use core::hash::{Hash, Hasher}; 11 | 12 | // NB: Rust needs a CRT runtime on Windows MSVC. 13 | #[cfg(all(windows, target_env = "msvc"))] 14 | #[link(name = "msvcrt")] 15 | #[link(name = "libcmt")] 16 | extern "C" {} 17 | 18 | #[no_mangle] 19 | fn main(_argc: isize, _argv: *const *const u8) -> isize { 20 | let mut h: ahash::AHasher = Default::default(); 21 | 42_i32.hash(&mut h); 22 | return h.finish() as isize; 23 | } 24 | 25 | 26 | #[alloc_error_handler] 27 | fn foo(_: core::alloc::Layout) -> ! { 28 | core::intrinsics::abort(); 29 | } 30 | 31 | #[panic_handler] 32 | #[lang = "panic_impl"] 33 | fn rust_begin_panic(_: &core::panic::PanicInfo) -> ! { 34 | core::intrinsics::abort(); 35 | } 36 | 37 | #[no_mangle] 38 | extern "C" fn _rust_eh_personality() {} 39 | 40 | #[no_mangle] 41 | extern "C" fn rust_eh_personality() {} 42 | 43 | #[no_mangle] 44 | extern "C" fn rust_eh_register_frames() {} 45 | 46 | #[no_mangle] 47 | extern "C" fn rust_eh_unregister_frames() {} 48 | 49 | #[no_mangle] 50 | extern "C" fn _Unwind_Resume() {} 51 | -------------------------------------------------------------------------------- /rustfmt.toml: -------------------------------------------------------------------------------- 1 | max_width = 120 2 | -------------------------------------------------------------------------------- /smhasher/ahash-cbindings/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "ahash-cbindings" 3 | version = "0.1.2" 4 | authors = ["Tom Kaitchuck "] 5 | edition = "2018" 6 | description = "C bindings for aHash so that it can be invoked by SMHasher to verify quality." 7 | 8 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 9 | [lib] 10 | name = "ahash_c" 11 | crate-type = ["staticlib"] # Creates static lib 12 | 13 | [profile.release] 14 | opt-level = 3 15 | debug = false 16 | lto = 'fat' 17 | debug-assertions = false 18 | 19 | [dependencies] 20 | ahash = { path = "../../", default-features = false } -------------------------------------------------------------------------------- /smhasher/ahash-cbindings/install.sh: -------------------------------------------------------------------------------- 1 | RUSTFLAGS="-C opt-level=3 -C target-cpu=native -C codegen-units=1" cargo build --release && sudo cp target/release/libahash_c.a /usr/local/lib/ 2 | -------------------------------------------------------------------------------- /smhasher/ahash-cbindings/src/lib.rs: -------------------------------------------------------------------------------- 1 | use ahash::*; 2 | use core::slice; 3 | use std::hash::{BuildHasher}; 4 | 5 | #[no_mangle] 6 | pub extern "C" fn ahash64(buf: *const (), len: usize, seed: u64) -> u64 { 7 | let buf: &[u8] = unsafe { slice::from_raw_parts(buf as *const u8, len) }; 8 | let build_hasher = RandomState::with_seeds(seed, seed, seed, seed); 9 | build_hasher.hash_one(&buf) 10 | } 11 | -------------------------------------------------------------------------------- /smhasher/clone_smhasher.sh: -------------------------------------------------------------------------------- 1 | git clone https://github.com/rurban/smhasher.git 2 | -------------------------------------------------------------------------------- /src/aes_hash.rs: -------------------------------------------------------------------------------- 1 | use crate::convert::*; 2 | use crate::operations::*; 3 | use crate::random_state::PI; 4 | use crate::RandomState; 5 | use core::hash::Hasher; 6 | 7 | /// A `Hasher` for hashing an arbitrary stream of bytes. 8 | /// 9 | /// Instances of [`AHasher`] represent state that is updated while hashing data. 10 | /// 11 | /// Each method updates the internal state based on the new data provided. Once 12 | /// all of the data has been provided, the resulting hash can be obtained by calling 13 | /// `finish()` 14 | /// 15 | /// [Clone] is also provided in case you wish to calculate hashes for two different items that 16 | /// start with the same data. 17 | /// 18 | #[derive(Debug, Clone)] 19 | pub struct AHasher { 20 | enc: u128, 21 | sum: u128, 22 | key: u128, 23 | } 24 | 25 | impl AHasher { 26 | /// Creates a new hasher keyed to the provided keys. 27 | /// 28 | /// Normally hashers are created via `AHasher::default()` for fixed keys or `RandomState::new()` for randomly 29 | /// generated keys and `RandomState::with_seeds(a,b)` for seeds that are set and can be reused. All of these work at 30 | /// map creation time (and hence don't have any overhead on a per-item bais). 31 | /// 32 | /// This method directly creates the hasher instance and performs no transformation on the provided seeds. This may 33 | /// be useful where a HashBuilder is not desired, such as for testing purposes. 34 | /// 35 | /// # Example 36 | /// 37 | /// ```no_build 38 | /// use std::hash::Hasher; 39 | /// use ahash::AHasher; 40 | /// 41 | /// let mut hasher = AHasher::new_with_keys(1234, 5678); 42 | /// 43 | /// hasher.write_u32(1989); 44 | /// hasher.write_u8(11); 45 | /// hasher.write_u8(9); 46 | /// hasher.write(b"Huh?"); 47 | /// 48 | /// println!("Hash is {:x}!", hasher.finish()); 49 | /// ``` 50 | #[inline] 51 | pub(crate) fn new_with_keys(key1: u128, key2: u128) -> Self { 52 | let pi: [u128; 2] = PI.convert(); 53 | let key1 = key1 ^ pi[0]; 54 | let key2 = key2 ^ pi[1]; 55 | Self { 56 | enc: key1, 57 | sum: key2, 58 | key: key1 ^ key2, 59 | } 60 | } 61 | 62 | #[allow(unused)] // False positive 63 | pub(crate) fn test_with_keys(key1: u128, key2: u128) -> Self { 64 | Self { 65 | enc: key1, 66 | sum: key2, 67 | key: key1 ^ key2, 68 | } 69 | } 70 | 71 | #[inline] 72 | pub(crate) fn from_random_state(rand_state: &RandomState) -> Self { 73 | let key1 = [rand_state.k0, rand_state.k1].convert(); 74 | let key2 = [rand_state.k2, rand_state.k3].convert(); 75 | Self { 76 | enc: key1, 77 | sum: key2, 78 | key: key1 ^ key2, 79 | } 80 | } 81 | 82 | #[inline(always)] 83 | fn hash_in(&mut self, new_value: u128) { 84 | self.enc = aesdec(self.enc, new_value); 85 | self.sum = shuffle_and_add(self.sum, new_value); 86 | } 87 | 88 | #[inline(always)] 89 | fn hash_in_2(&mut self, v1: u128, v2: u128) { 90 | self.enc = aesdec(self.enc, v1); 91 | self.sum = shuffle_and_add(self.sum, v1); 92 | self.enc = aesdec(self.enc, v2); 93 | self.sum = shuffle_and_add(self.sum, v2); 94 | } 95 | 96 | #[inline] 97 | #[cfg(specialize)] 98 | fn short_finish(&self) -> u64 { 99 | let combined = aesenc(self.sum, self.enc); 100 | let result: [u64; 2] = aesdec(combined, combined).convert(); 101 | result[0] 102 | } 103 | } 104 | 105 | /// Provides [Hasher] methods to hash all of the primitive types. 106 | /// 107 | /// [Hasher]: core::hash::Hasher 108 | impl Hasher for AHasher { 109 | #[inline] 110 | fn write_u8(&mut self, i: u8) { 111 | self.write_u64(i as u64); 112 | } 113 | 114 | #[inline] 115 | fn write_u16(&mut self, i: u16) { 116 | self.write_u64(i as u64); 117 | } 118 | 119 | #[inline] 120 | fn write_u32(&mut self, i: u32) { 121 | self.write_u64(i as u64); 122 | } 123 | 124 | #[inline] 125 | fn write_u128(&mut self, i: u128) { 126 | self.hash_in(i); 127 | } 128 | 129 | #[inline] 130 | #[cfg(any( 131 | target_pointer_width = "64", 132 | target_pointer_width = "32", 133 | target_pointer_width = "16" 134 | ))] 135 | fn write_usize(&mut self, i: usize) { 136 | self.write_u64(i as u64); 137 | } 138 | 139 | #[inline] 140 | #[cfg(target_pointer_width = "128")] 141 | fn write_usize(&mut self, i: usize) { 142 | self.write_u128(i as u128); 143 | } 144 | 145 | #[inline] 146 | fn write_u64(&mut self, i: u64) { 147 | self.write_u128(i as u128); 148 | } 149 | 150 | #[inline] 151 | #[allow(clippy::collapsible_if)] 152 | fn write(&mut self, input: &[u8]) { 153 | let mut data = input; 154 | let length = data.len(); 155 | add_in_length(&mut self.enc, length as u64); 156 | 157 | //A 'binary search' on sizes reduces the number of comparisons. 158 | if data.len() <= 8 { 159 | let value = read_small(data); 160 | self.hash_in(value.convert()); 161 | } else { 162 | if data.len() > 32 { 163 | if data.len() > 64 { 164 | let tail = data.read_last_u128x4(); 165 | let mut current: [u128; 4] = [self.key; 4]; 166 | current[0] = aesenc(current[0], tail[0]); 167 | current[1] = aesdec(current[1], tail[1]); 168 | current[2] = aesenc(current[2], tail[2]); 169 | current[3] = aesdec(current[3], tail[3]); 170 | let mut sum: [u128; 2] = [self.key, !self.key]; 171 | sum[0] = add_by_64s(sum[0].convert(), tail[0].convert()).convert(); 172 | sum[1] = add_by_64s(sum[1].convert(), tail[1].convert()).convert(); 173 | sum[0] = shuffle_and_add(sum[0], tail[2]); 174 | sum[1] = shuffle_and_add(sum[1], tail[3]); 175 | while data.len() > 64 { 176 | let (blocks, rest) = data.read_u128x4(); 177 | current[0] = aesdec(current[0], blocks[0]); 178 | current[1] = aesdec(current[1], blocks[1]); 179 | current[2] = aesdec(current[2], blocks[2]); 180 | current[3] = aesdec(current[3], blocks[3]); 181 | sum[0] = shuffle_and_add(sum[0], blocks[0]); 182 | sum[1] = shuffle_and_add(sum[1], blocks[1]); 183 | sum[0] = shuffle_and_add(sum[0], blocks[2]); 184 | sum[1] = shuffle_and_add(sum[1], blocks[3]); 185 | data = rest; 186 | } 187 | self.hash_in_2(current[0], current[1]); 188 | self.hash_in_2(current[2], current[3]); 189 | self.hash_in_2(sum[0], sum[1]); 190 | } else { 191 | //len 33-64 192 | let (head, _) = data.read_u128x2(); 193 | let tail = data.read_last_u128x2(); 194 | self.hash_in_2(head[0], head[1]); 195 | self.hash_in_2(tail[0], tail[1]); 196 | } 197 | } else { 198 | if data.len() > 16 { 199 | //len 17-32 200 | self.hash_in_2(data.read_u128().0, data.read_last_u128()); 201 | } else { 202 | //len 9-16 203 | let value: [u64; 2] = [data.read_u64().0, data.read_last_u64()]; 204 | self.hash_in(value.convert()); 205 | } 206 | } 207 | } 208 | } 209 | #[inline] 210 | fn finish(&self) -> u64 { 211 | let combined = aesenc(self.sum, self.enc); 212 | let result: [u64; 2] = aesdec(aesdec(combined, self.key), combined).convert(); 213 | result[0] 214 | } 215 | } 216 | 217 | #[cfg(specialize)] 218 | pub(crate) struct AHasherU64 { 219 | pub(crate) buffer: u64, 220 | pub(crate) pad: u64, 221 | } 222 | 223 | /// A specialized hasher for only primitives under 64 bits. 224 | #[cfg(specialize)] 225 | impl Hasher for AHasherU64 { 226 | #[inline] 227 | fn finish(&self) -> u64 { 228 | folded_multiply(self.buffer, self.pad) 229 | } 230 | 231 | #[inline] 232 | fn write(&mut self, _bytes: &[u8]) { 233 | unreachable!("Specialized hasher was called with a different type of object") 234 | } 235 | 236 | #[inline] 237 | fn write_u8(&mut self, i: u8) { 238 | self.write_u64(i as u64); 239 | } 240 | 241 | #[inline] 242 | fn write_u16(&mut self, i: u16) { 243 | self.write_u64(i as u64); 244 | } 245 | 246 | #[inline] 247 | fn write_u32(&mut self, i: u32) { 248 | self.write_u64(i as u64); 249 | } 250 | 251 | #[inline] 252 | fn write_u64(&mut self, i: u64) { 253 | self.buffer = folded_multiply(i ^ self.buffer, MULTIPLE); 254 | } 255 | 256 | #[inline] 257 | fn write_u128(&mut self, _i: u128) { 258 | unreachable!("Specialized hasher was called with a different type of object") 259 | } 260 | 261 | #[inline] 262 | fn write_usize(&mut self, _i: usize) { 263 | unreachable!("Specialized hasher was called with a different type of object") 264 | } 265 | } 266 | 267 | #[cfg(specialize)] 268 | pub(crate) struct AHasherFixed(pub AHasher); 269 | 270 | /// A specialized hasher for fixed size primitives larger than 64 bits. 271 | #[cfg(specialize)] 272 | impl Hasher for AHasherFixed { 273 | #[inline] 274 | fn finish(&self) -> u64 { 275 | self.0.short_finish() 276 | } 277 | 278 | #[inline] 279 | fn write(&mut self, bytes: &[u8]) { 280 | self.0.write(bytes) 281 | } 282 | 283 | #[inline] 284 | fn write_u8(&mut self, i: u8) { 285 | self.write_u64(i as u64); 286 | } 287 | 288 | #[inline] 289 | fn write_u16(&mut self, i: u16) { 290 | self.write_u64(i as u64); 291 | } 292 | 293 | #[inline] 294 | fn write_u32(&mut self, i: u32) { 295 | self.write_u64(i as u64); 296 | } 297 | 298 | #[inline] 299 | fn write_u64(&mut self, i: u64) { 300 | self.0.write_u64(i); 301 | } 302 | 303 | #[inline] 304 | fn write_u128(&mut self, i: u128) { 305 | self.0.write_u128(i); 306 | } 307 | 308 | #[inline] 309 | fn write_usize(&mut self, i: usize) { 310 | self.0.write_usize(i); 311 | } 312 | } 313 | 314 | #[cfg(specialize)] 315 | pub(crate) struct AHasherStr(pub AHasher); 316 | 317 | /// A specialized hasher for strings 318 | /// Note that the other types don't panic because the hash impl for String tacks on an unneeded call. (As does vec) 319 | #[cfg(specialize)] 320 | impl Hasher for AHasherStr { 321 | #[inline] 322 | fn finish(&self) -> u64 { 323 | let result: [u64; 2] = self.0.enc.convert(); 324 | result[0] 325 | } 326 | 327 | #[inline] 328 | fn write(&mut self, bytes: &[u8]) { 329 | if bytes.len() > 8 { 330 | self.0.write(bytes); 331 | self.0.enc = aesenc(self.0.sum, self.0.enc); 332 | self.0.enc = aesdec(aesdec(self.0.enc, self.0.key), self.0.enc); 333 | } else { 334 | add_in_length(&mut self.0.enc, bytes.len() as u64); 335 | 336 | let value = read_small(bytes).convert(); 337 | self.0.sum = shuffle_and_add(self.0.sum, value); 338 | self.0.enc = aesenc(self.0.sum, self.0.enc); 339 | self.0.enc = aesdec(aesdec(self.0.enc, self.0.key), self.0.enc); 340 | } 341 | } 342 | 343 | #[inline] 344 | fn write_u8(&mut self, _i: u8) {} 345 | 346 | #[inline] 347 | fn write_u16(&mut self, _i: u16) {} 348 | 349 | #[inline] 350 | fn write_u32(&mut self, _i: u32) {} 351 | 352 | #[inline] 353 | fn write_u64(&mut self, _i: u64) {} 354 | 355 | #[inline] 356 | fn write_u128(&mut self, _i: u128) {} 357 | 358 | #[inline] 359 | fn write_usize(&mut self, _i: usize) {} 360 | } 361 | 362 | #[cfg(test)] 363 | mod tests { 364 | use super::*; 365 | use crate::convert::Convert; 366 | use crate::operations::aesenc; 367 | use crate::RandomState; 368 | use std::hash::{BuildHasher, Hasher}; 369 | #[test] 370 | fn test_sanity() { 371 | let mut hasher = RandomState::with_seeds(1, 2, 3, 4).build_hasher(); 372 | hasher.write_u64(0); 373 | let h1 = hasher.finish(); 374 | hasher.write(&[1, 0, 0, 0, 0, 0, 0, 0]); 375 | let h2 = hasher.finish(); 376 | assert_ne!(h1, h2); 377 | } 378 | 379 | #[cfg(feature = "compile-time-rng")] 380 | #[test] 381 | fn test_builder() { 382 | use std::collections::HashMap; 383 | use std::hash::BuildHasherDefault; 384 | 385 | let mut map = HashMap::>::default(); 386 | map.insert(1, 3); 387 | } 388 | 389 | #[cfg(feature = "compile-time-rng")] 390 | #[test] 391 | fn test_default() { 392 | let hasher_a = AHasher::default(); 393 | let a_enc: [u64; 2] = hasher_a.enc.convert(); 394 | let a_sum: [u64; 2] = hasher_a.sum.convert(); 395 | assert_ne!(0, a_enc[0]); 396 | assert_ne!(0, a_enc[1]); 397 | assert_ne!(0, a_sum[0]); 398 | assert_ne!(0, a_sum[1]); 399 | assert_ne!(a_enc[0], a_enc[1]); 400 | assert_ne!(a_sum[0], a_sum[1]); 401 | assert_ne!(a_enc[0], a_sum[0]); 402 | assert_ne!(a_enc[1], a_sum[1]); 403 | let hasher_b = AHasher::default(); 404 | let b_enc: [u64; 2] = hasher_b.enc.convert(); 405 | let b_sum: [u64; 2] = hasher_b.sum.convert(); 406 | assert_eq!(a_enc[0], b_enc[0]); 407 | assert_eq!(a_enc[1], b_enc[1]); 408 | assert_eq!(a_sum[0], b_sum[0]); 409 | assert_eq!(a_sum[1], b_sum[1]); 410 | } 411 | 412 | #[test] 413 | fn test_hash() { 414 | let mut result: [u64; 2] = [0x6c62272e07bb0142, 0x62b821756295c58d]; 415 | let value: [u64; 2] = [1 << 32, 0xFEDCBA9876543210]; 416 | result = aesenc(value.convert(), result.convert()).convert(); 417 | result = aesenc(result.convert(), result.convert()).convert(); 418 | let mut result2: [u64; 2] = [0x6c62272e07bb0142, 0x62b821756295c58d]; 419 | let value2: [u64; 2] = [1, 0xFEDCBA9876543210]; 420 | result2 = aesenc(value2.convert(), result2.convert()).convert(); 421 | result2 = aesenc(result2.convert(), result.convert()).convert(); 422 | let result: [u8; 16] = result.convert(); 423 | let result2: [u8; 16] = result2.convert(); 424 | assert_ne!(hex::encode(result), hex::encode(result2)); 425 | } 426 | } 427 | -------------------------------------------------------------------------------- /src/convert.rs: -------------------------------------------------------------------------------- 1 | pub(crate) trait Convert { 2 | fn convert(self) -> To; 3 | } 4 | 5 | macro_rules! convert { 6 | ($a:ty, $b:ty) => { 7 | impl Convert<$b> for $a { 8 | #[inline(always)] 9 | fn convert(self) -> $b { 10 | zerocopy::transmute!(self) 11 | } 12 | } 13 | impl Convert<$a> for $b { 14 | #[inline(always)] 15 | fn convert(self) -> $a { 16 | zerocopy::transmute!(self) 17 | } 18 | } 19 | }; 20 | } 21 | 22 | macro_rules! convert_primitive_bytes { 23 | ($a:ty, $b:ty) => { 24 | impl Convert<$b> for $a { 25 | #[inline(always)] 26 | fn convert(self) -> $b { 27 | self.to_ne_bytes() 28 | } 29 | } 30 | impl Convert<$a> for $b { 31 | #[inline(always)] 32 | fn convert(self) -> $a { 33 | <$a>::from_ne_bytes(self) 34 | } 35 | } 36 | }; 37 | } 38 | 39 | convert!([u128; 4], [u8; 64]); 40 | convert!([u128; 2], [u64; 4]); 41 | convert!([u128; 2], [u8; 32]); 42 | convert!(u128, [u64; 2]); 43 | convert_primitive_bytes!(u128, [u8; 16]); 44 | #[cfg(test)] 45 | convert!([u64; 2], [u8; 16]); 46 | convert_primitive_bytes!(u64, [u8; 8]); 47 | convert_primitive_bytes!(u32, [u8; 4]); 48 | convert_primitive_bytes!(u16, [u8; 2]); 49 | convert!([[u64; 4]; 2], [u8; 64]); 50 | 51 | macro_rules! as_array { 52 | ($input:expr, $len:expr) => {{ 53 | { 54 | #[inline(always)] 55 | fn as_array(slice: &[T]) -> &[T; $len] { 56 | core::convert::TryFrom::try_from(slice).unwrap() 57 | } 58 | as_array($input) 59 | } 60 | }}; 61 | } 62 | 63 | pub(crate) trait ReadFromSlice { 64 | fn read_u16(&self) -> (u16, &[u8]); 65 | fn read_u32(&self) -> (u32, &[u8]); 66 | fn read_u64(&self) -> (u64, &[u8]); 67 | fn read_u128(&self) -> (u128, &[u8]); 68 | fn read_u128x2(&self) -> ([u128; 2], &[u8]); 69 | fn read_u128x4(&self) -> ([u128; 4], &[u8]); 70 | fn read_last_u16(&self) -> u16; 71 | fn read_last_u32(&self) -> u32; 72 | fn read_last_u64(&self) -> u64; 73 | fn read_last_u128(&self) -> u128; 74 | fn read_last_u128x2(&self) -> [u128; 2]; 75 | fn read_last_u128x4(&self) -> [u128; 4]; 76 | } 77 | 78 | impl ReadFromSlice for [u8] { 79 | #[inline(always)] 80 | fn read_u16(&self) -> (u16, &[u8]) { 81 | let (value, rest) = self.split_at(2); 82 | (as_array!(value, 2).convert(), rest) 83 | } 84 | 85 | #[inline(always)] 86 | fn read_u32(&self) -> (u32, &[u8]) { 87 | let (value, rest) = self.split_at(4); 88 | (as_array!(value, 4).convert(), rest) 89 | } 90 | 91 | #[inline(always)] 92 | fn read_u64(&self) -> (u64, &[u8]) { 93 | let (value, rest) = self.split_at(8); 94 | (as_array!(value, 8).convert(), rest) 95 | } 96 | 97 | #[inline(always)] 98 | fn read_u128(&self) -> (u128, &[u8]) { 99 | let (value, rest) = self.split_at(16); 100 | (as_array!(value, 16).convert(), rest) 101 | } 102 | 103 | #[inline(always)] 104 | fn read_u128x2(&self) -> ([u128; 2], &[u8]) { 105 | let (value, rest) = self.split_at(32); 106 | (as_array!(value, 32).convert(), rest) 107 | } 108 | 109 | #[inline(always)] 110 | fn read_u128x4(&self) -> ([u128; 4], &[u8]) { 111 | let (value, rest) = self.split_at(64); 112 | (as_array!(value, 64).convert(), rest) 113 | } 114 | 115 | #[inline(always)] 116 | fn read_last_u16(&self) -> u16 { 117 | let (_, value) = self.split_at(self.len() - 2); 118 | as_array!(value, 2).convert() 119 | } 120 | 121 | #[inline(always)] 122 | fn read_last_u32(&self) -> u32 { 123 | let (_, value) = self.split_at(self.len() - 4); 124 | as_array!(value, 4).convert() 125 | } 126 | 127 | #[inline(always)] 128 | fn read_last_u64(&self) -> u64 { 129 | let (_, value) = self.split_at(self.len() - 8); 130 | as_array!(value, 8).convert() 131 | } 132 | 133 | #[inline(always)] 134 | fn read_last_u128(&self) -> u128 { 135 | let (_, value) = self.split_at(self.len() - 16); 136 | as_array!(value, 16).convert() 137 | } 138 | 139 | #[inline(always)] 140 | fn read_last_u128x2(&self) -> [u128; 2] { 141 | let (_, value) = self.split_at(self.len() - 32); 142 | as_array!(value, 32).convert() 143 | } 144 | 145 | #[inline(always)] 146 | fn read_last_u128x4(&self) -> [u128; 4] { 147 | let (_, value) = self.split_at(self.len() - 64); 148 | as_array!(value, 64).convert() 149 | } 150 | } 151 | -------------------------------------------------------------------------------- /src/fallback_hash.rs: -------------------------------------------------------------------------------- 1 | use crate::convert::*; 2 | use crate::operations::folded_multiply; 3 | use crate::operations::read_small; 4 | use crate::operations::MULTIPLE; 5 | use crate::random_state::PI; 6 | use crate::RandomState; 7 | use core::hash::Hasher; 8 | 9 | const ROT: u32 = 23; //17 10 | 11 | /// A `Hasher` for hashing an arbitrary stream of bytes. 12 | /// 13 | /// Instances of [`AHasher`] represent state that is updated while hashing data. 14 | /// 15 | /// Each method updates the internal state based on the new data provided. Once 16 | /// all of the data has been provided, the resulting hash can be obtained by calling 17 | /// `finish()` 18 | /// 19 | /// [Clone] is also provided in case you wish to calculate hashes for two different items that 20 | /// start with the same data. 21 | /// 22 | #[derive(Debug, Clone)] 23 | pub struct AHasher { 24 | buffer: u64, 25 | pad: u64, 26 | extra_keys: [u64; 2], 27 | } 28 | 29 | impl AHasher { 30 | /// Creates a new hasher keyed to the provided key. 31 | #[inline] 32 | #[allow(dead_code)] // Is not called if non-fallback hash is used. 33 | pub(crate) fn new_with_keys(key1: u128, key2: u128) -> AHasher { 34 | let pi: [u128; 2] = PI.convert(); 35 | let key1: [u64; 2] = (key1 ^ pi[0]).convert(); 36 | let key2: [u64; 2] = (key2 ^ pi[1]).convert(); 37 | AHasher { 38 | buffer: key1[0], 39 | pad: key1[1], 40 | extra_keys: key2, 41 | } 42 | } 43 | 44 | #[allow(unused)] // False positive 45 | pub(crate) fn test_with_keys(key1: u128, key2: u128) -> Self { 46 | let key1: [u64; 2] = key1.convert(); 47 | let key2: [u64; 2] = key2.convert(); 48 | Self { 49 | buffer: key1[0], 50 | pad: key1[1], 51 | extra_keys: key2, 52 | } 53 | } 54 | 55 | #[inline] 56 | #[allow(dead_code)] // Is not called if non-fallback hash is used. 57 | pub(crate) fn from_random_state(rand_state: &RandomState) -> AHasher { 58 | AHasher { 59 | buffer: rand_state.k1, 60 | pad: rand_state.k0, 61 | extra_keys: [rand_state.k2, rand_state.k3], 62 | } 63 | } 64 | 65 | /// This update function has the goal of updating the buffer with a single multiply 66 | /// FxHash does this but is vulnerable to attack. To avoid this input needs to be masked to with an 67 | /// unpredictable value. Other hashes such as murmurhash have taken this approach but were found vulnerable 68 | /// to attack. The attack was based on the idea of reversing the pre-mixing (Which is necessarily 69 | /// reversible otherwise bits would be lost) then placing a difference in the highest bit before the 70 | /// multiply used to mix the data. Because a multiply can never affect the bits to the right of it, a 71 | /// subsequent update that also differed in this bit could result in a predictable collision. 72 | /// 73 | /// This version avoids this vulnerability while still only using a single multiply. It takes advantage 74 | /// of the fact that when a 64 bit multiply is performed the upper 64 bits are usually computed and thrown 75 | /// away. Instead it creates two 128 bit values where the upper 64 bits are zeros and multiplies them. 76 | /// (The compiler is smart enough to turn this into a 64 bit multiplication in the assembly) 77 | /// Then the upper bits are xored with the lower bits to produce a single 64 bit result. 78 | /// 79 | /// To understand why this is a good scrambling function it helps to understand multiply-with-carry PRNGs: 80 | /// https://en.wikipedia.org/wiki/Multiply-with-carry_pseudorandom_number_generator 81 | /// If the multiple is chosen well, this creates a long period, decent quality PRNG. 82 | /// Notice that this function is equivalent to this except the `buffer`/`state` is being xored with each 83 | /// new block of data. In the event that data is all zeros, it is exactly equivalent to a MWC PRNG. 84 | /// 85 | /// This is impervious to attack because every bit buffer at the end is dependent on every bit in 86 | /// `new_data ^ buffer`. For example suppose two inputs differed in only the 5th bit. Then when the 87 | /// multiplication is performed the `result` will differ in bits 5-69. More specifically it will differ by 88 | /// 2^5 * MULTIPLE. However in the next step bits 65-128 are turned into a separate 64 bit value. So the 89 | /// differing bits will be in the lower 6 bits of this value. The two intermediate values that differ in 90 | /// bits 5-63 and in bits 0-5 respectively get added together. Producing an output that differs in every 91 | /// bit. The addition carries in the multiplication and at the end additionally mean that the even if an 92 | /// attacker somehow knew part of (but not all) the contents of the buffer before hand, 93 | /// they would not be able to predict any of the bits in the buffer at the end. 94 | #[inline(always)] 95 | fn update(&mut self, new_data: u64) { 96 | self.buffer = folded_multiply(new_data ^ self.buffer, MULTIPLE); 97 | } 98 | 99 | /// Similar to the above this function performs an update using a "folded multiply". 100 | /// However it takes in 128 bits of data instead of 64. Both halves must be masked. 101 | /// 102 | /// This makes it impossible for an attacker to place a single bit difference between 103 | /// two blocks so as to cancel each other. 104 | /// 105 | /// However this is not sufficient. to prevent (a,b) from hashing the same as (b,a) the buffer itself must 106 | /// be updated between calls in a way that does not commute. To achieve this XOR and Rotate are used. 107 | /// Add followed by xor is not the same as xor followed by add, and rotate ensures that the same out bits 108 | /// can't be changed by the same set of input bits. To cancel this sequence with subsequent input would require 109 | /// knowing the keys. 110 | #[inline(always)] 111 | fn large_update(&mut self, new_data: u128) { 112 | let block: [u64; 2] = new_data.convert(); 113 | let combined = folded_multiply(block[0] ^ self.extra_keys[0], block[1] ^ self.extra_keys[1]); 114 | self.buffer = (self.buffer.wrapping_add(self.pad) ^ combined).rotate_left(ROT); 115 | } 116 | 117 | #[inline] 118 | #[cfg(specialize)] 119 | fn short_finish(&self) -> u64 { 120 | folded_multiply(self.buffer, self.pad) 121 | } 122 | } 123 | 124 | /// Provides [Hasher] methods to hash all of the primitive types. 125 | /// 126 | /// [Hasher]: core::hash::Hasher 127 | impl Hasher for AHasher { 128 | #[inline] 129 | fn write_u8(&mut self, i: u8) { 130 | self.update(i as u64); 131 | } 132 | 133 | #[inline] 134 | fn write_u16(&mut self, i: u16) { 135 | self.update(i as u64); 136 | } 137 | 138 | #[inline] 139 | fn write_u32(&mut self, i: u32) { 140 | self.update(i as u64); 141 | } 142 | 143 | #[inline] 144 | fn write_u64(&mut self, i: u64) { 145 | self.update(i as u64); 146 | } 147 | 148 | #[inline] 149 | fn write_u128(&mut self, i: u128) { 150 | self.large_update(i); 151 | } 152 | 153 | #[inline] 154 | #[cfg(any( 155 | target_pointer_width = "64", 156 | target_pointer_width = "32", 157 | target_pointer_width = "16" 158 | ))] 159 | fn write_usize(&mut self, i: usize) { 160 | self.write_u64(i as u64); 161 | } 162 | 163 | #[inline] 164 | #[cfg(target_pointer_width = "128")] 165 | fn write_usize(&mut self, i: usize) { 166 | self.write_u128(i as u128); 167 | } 168 | 169 | #[inline] 170 | #[allow(clippy::collapsible_if)] 171 | fn write(&mut self, input: &[u8]) { 172 | let mut data = input; 173 | let length = data.len() as u64; 174 | //Needs to be an add rather than an xor because otherwise it could be canceled with carefully formed input. 175 | self.buffer = self.buffer.wrapping_add(length).wrapping_mul(MULTIPLE); 176 | //A 'binary search' on sizes reduces the number of comparisons. 177 | if data.len() > 8 { 178 | if data.len() > 16 { 179 | let tail = data.read_last_u128(); 180 | self.large_update(tail); 181 | while data.len() > 16 { 182 | let (block, rest) = data.read_u128(); 183 | self.large_update(block); 184 | data = rest; 185 | } 186 | } else { 187 | self.large_update([data.read_u64().0, data.read_last_u64()].convert()); 188 | } 189 | } else { 190 | let value = read_small(data); 191 | self.large_update(value.convert()); 192 | } 193 | } 194 | 195 | #[inline] 196 | fn finish(&self) -> u64 { 197 | let rot = (self.buffer & 63) as u32; 198 | folded_multiply(self.buffer, self.pad).rotate_left(rot) 199 | } 200 | } 201 | 202 | #[cfg(specialize)] 203 | pub(crate) struct AHasherU64 { 204 | pub(crate) buffer: u64, 205 | pub(crate) pad: u64, 206 | } 207 | 208 | /// A specialized hasher for only primitives under 64 bits. 209 | #[cfg(specialize)] 210 | impl Hasher for AHasherU64 { 211 | #[inline] 212 | fn finish(&self) -> u64 { 213 | folded_multiply(self.buffer, self.pad) 214 | //self.buffer 215 | } 216 | 217 | #[inline] 218 | fn write(&mut self, _bytes: &[u8]) { 219 | unreachable!("Specialized hasher was called with a different type of object") 220 | } 221 | 222 | #[inline] 223 | fn write_u8(&mut self, i: u8) { 224 | self.write_u64(i as u64); 225 | } 226 | 227 | #[inline] 228 | fn write_u16(&mut self, i: u16) { 229 | self.write_u64(i as u64); 230 | } 231 | 232 | #[inline] 233 | fn write_u32(&mut self, i: u32) { 234 | self.write_u64(i as u64); 235 | } 236 | 237 | #[inline] 238 | fn write_u64(&mut self, i: u64) { 239 | self.buffer = folded_multiply(i ^ self.buffer, MULTIPLE); 240 | } 241 | 242 | #[inline] 243 | fn write_u128(&mut self, _i: u128) { 244 | unreachable!("Specialized hasher was called with a different type of object") 245 | } 246 | 247 | #[inline] 248 | fn write_usize(&mut self, _i: usize) { 249 | unreachable!("Specialized hasher was called with a different type of object") 250 | } 251 | } 252 | 253 | #[cfg(specialize)] 254 | pub(crate) struct AHasherFixed(pub AHasher); 255 | 256 | /// A specialized hasher for fixed size primitives larger than 64 bits. 257 | #[cfg(specialize)] 258 | impl Hasher for AHasherFixed { 259 | #[inline] 260 | fn finish(&self) -> u64 { 261 | self.0.short_finish() 262 | } 263 | 264 | #[inline] 265 | fn write(&mut self, bytes: &[u8]) { 266 | self.0.write(bytes) 267 | } 268 | 269 | #[inline] 270 | fn write_u8(&mut self, i: u8) { 271 | self.write_u64(i as u64); 272 | } 273 | 274 | #[inline] 275 | fn write_u16(&mut self, i: u16) { 276 | self.write_u64(i as u64); 277 | } 278 | 279 | #[inline] 280 | fn write_u32(&mut self, i: u32) { 281 | self.write_u64(i as u64); 282 | } 283 | 284 | #[inline] 285 | fn write_u64(&mut self, i: u64) { 286 | self.0.write_u64(i); 287 | } 288 | 289 | #[inline] 290 | fn write_u128(&mut self, i: u128) { 291 | self.0.write_u128(i); 292 | } 293 | 294 | #[inline] 295 | fn write_usize(&mut self, i: usize) { 296 | self.0.write_usize(i); 297 | } 298 | } 299 | 300 | #[cfg(specialize)] 301 | pub(crate) struct AHasherStr(pub AHasher); 302 | 303 | /// A specialized hasher for a single string 304 | /// Note that the other types don't panic because the hash impl for String tacks on an unneeded call. (As does vec) 305 | #[cfg(specialize)] 306 | impl Hasher for AHasherStr { 307 | #[inline] 308 | fn finish(&self) -> u64 { 309 | self.0.finish() 310 | } 311 | 312 | #[inline] 313 | fn write(&mut self, bytes: &[u8]) { 314 | if bytes.len() > 8 { 315 | self.0.write(bytes) 316 | } else { 317 | let value = read_small(bytes); 318 | self.0.buffer = folded_multiply(value[0] ^ self.0.buffer, value[1] ^ self.0.extra_keys[1]); 319 | self.0.pad = self.0.pad.wrapping_add(bytes.len() as u64); 320 | } 321 | } 322 | 323 | #[inline] 324 | fn write_u8(&mut self, _i: u8) {} 325 | 326 | #[inline] 327 | fn write_u16(&mut self, _i: u16) {} 328 | 329 | #[inline] 330 | fn write_u32(&mut self, _i: u32) {} 331 | 332 | #[inline] 333 | fn write_u64(&mut self, _i: u64) {} 334 | 335 | #[inline] 336 | fn write_u128(&mut self, _i: u128) {} 337 | 338 | #[inline] 339 | fn write_usize(&mut self, _i: usize) {} 340 | } 341 | 342 | #[cfg(test)] 343 | mod tests { 344 | use crate::fallback_hash::*; 345 | 346 | #[test] 347 | fn test_hash() { 348 | let mut hasher = AHasher::new_with_keys(0, 0); 349 | let value: u64 = 1 << 32; 350 | hasher.update(value); 351 | let result = hasher.buffer; 352 | let mut hasher = AHasher::new_with_keys(0, 0); 353 | let value2: u64 = 1; 354 | hasher.update(value2); 355 | let result2 = hasher.buffer; 356 | let result: [u8; 8] = result.convert(); 357 | let result2: [u8; 8] = result2.convert(); 358 | assert_ne!(hex::encode(result), hex::encode(result2)); 359 | } 360 | } 361 | -------------------------------------------------------------------------------- /src/hash_map.rs: -------------------------------------------------------------------------------- 1 | use std::borrow::Borrow; 2 | use std::collections::hash_map::{IntoKeys, IntoValues}; 3 | use std::collections::{hash_map, HashMap}; 4 | use std::fmt::{self, Debug}; 5 | use std::hash::{BuildHasher, Hash}; 6 | use std::iter::FromIterator; 7 | use std::ops::{Deref, DerefMut, Index}; 8 | use std::panic::UnwindSafe; 9 | 10 | #[cfg(feature = "serde")] 11 | use serde::{ 12 | de::{Deserialize, Deserializer}, 13 | ser::{Serialize, Serializer}, 14 | }; 15 | 16 | use crate::RandomState; 17 | 18 | /// A [`HashMap`](std::collections::HashMap) using [`RandomState`](crate::RandomState) to hash the items. 19 | /// (Requires the `std` feature to be enabled.) 20 | #[derive(Clone)] 21 | pub struct AHashMap(HashMap); 22 | 23 | impl From> for AHashMap { 24 | fn from(item: HashMap) -> Self { 25 | AHashMap(item) 26 | } 27 | } 28 | 29 | impl From<[(K, V); N]> for AHashMap 30 | where 31 | K: Eq + Hash, 32 | { 33 | /// # Examples 34 | /// 35 | /// ``` 36 | /// use ahash::AHashMap; 37 | /// 38 | /// let map1 = AHashMap::from([(1, 2), (3, 4)]); 39 | /// let map2: AHashMap<_, _> = [(1, 2), (3, 4)].into(); 40 | /// assert_eq!(map1, map2); 41 | /// ``` 42 | fn from(arr: [(K, V); N]) -> Self { 43 | Self::from_iter(arr) 44 | } 45 | } 46 | 47 | impl Into> for AHashMap { 48 | fn into(self) -> HashMap { 49 | self.0 50 | } 51 | } 52 | 53 | impl AHashMap { 54 | /// This creates a hashmap using [RandomState::new] which obtains its keys from [RandomSource]. 55 | /// See the documentation in [RandomSource] for notes about key strength. 56 | pub fn new() -> Self { 57 | AHashMap(HashMap::with_hasher(RandomState::new())) 58 | } 59 | 60 | /// This creates a hashmap with the specified capacity using [RandomState::new]. 61 | /// See the documentation in [RandomSource] for notes about key strength. 62 | pub fn with_capacity(capacity: usize) -> Self { 63 | AHashMap(HashMap::with_capacity_and_hasher(capacity, RandomState::new())) 64 | } 65 | } 66 | 67 | impl AHashMap 68 | where 69 | S: BuildHasher, 70 | { 71 | pub fn with_hasher(hash_builder: S) -> Self { 72 | AHashMap(HashMap::with_hasher(hash_builder)) 73 | } 74 | 75 | pub fn with_capacity_and_hasher(capacity: usize, hash_builder: S) -> Self { 76 | AHashMap(HashMap::with_capacity_and_hasher(capacity, hash_builder)) 77 | } 78 | } 79 | 80 | impl AHashMap 81 | where 82 | K: Hash + Eq, 83 | S: BuildHasher, 84 | { 85 | /// Returns a reference to the value corresponding to the key. 86 | /// 87 | /// The key may be any borrowed form of the map's key type, but 88 | /// [`Hash`] and [`Eq`] on the borrowed form *must* match those for 89 | /// the key type. 90 | /// 91 | /// # Examples 92 | /// 93 | /// ``` 94 | /// use std::collections::HashMap; 95 | /// 96 | /// let mut map = HashMap::new(); 97 | /// map.insert(1, "a"); 98 | /// assert_eq!(map.get(&1), Some(&"a")); 99 | /// assert_eq!(map.get(&2), None); 100 | /// ``` 101 | #[inline] 102 | pub fn get(&self, k: &Q) -> Option<&V> 103 | where 104 | K: Borrow, 105 | Q: Hash + Eq, 106 | { 107 | self.0.get(k) 108 | } 109 | 110 | /// Returns the key-value pair corresponding to the supplied key. 111 | /// 112 | /// The supplied key may be any borrowed form of the map's key type, but 113 | /// [`Hash`] and [`Eq`] on the borrowed form *must* match those for 114 | /// the key type. 115 | /// 116 | /// # Examples 117 | /// 118 | /// ``` 119 | /// use std::collections::HashMap; 120 | /// 121 | /// let mut map = HashMap::new(); 122 | /// map.insert(1, "a"); 123 | /// assert_eq!(map.get_key_value(&1), Some((&1, &"a"))); 124 | /// assert_eq!(map.get_key_value(&2), None); 125 | /// ``` 126 | #[inline] 127 | pub fn get_key_value(&self, k: &Q) -> Option<(&K, &V)> 128 | where 129 | K: Borrow, 130 | Q: Hash + Eq, 131 | { 132 | self.0.get_key_value(k) 133 | } 134 | 135 | /// Returns a mutable reference to the value corresponding to the key. 136 | /// 137 | /// The key may be any borrowed form of the map's key type, but 138 | /// [`Hash`] and [`Eq`] on the borrowed form *must* match those for 139 | /// the key type. 140 | /// 141 | /// # Examples 142 | /// 143 | /// ``` 144 | /// use std::collections::HashMap; 145 | /// 146 | /// let mut map = HashMap::new(); 147 | /// map.insert(1, "a"); 148 | /// if let Some(x) = map.get_mut(&1) { 149 | /// *x = "b"; 150 | /// } 151 | /// assert_eq!(map[&1], "b"); 152 | /// ``` 153 | #[inline] 154 | pub fn get_mut(&mut self, k: &Q) -> Option<&mut V> 155 | where 156 | K: Borrow, 157 | Q: Hash + Eq, 158 | { 159 | self.0.get_mut(k) 160 | } 161 | 162 | /// Inserts a key-value pair into the map. 163 | /// 164 | /// If the map did not have this key present, [`None`] is returned. 165 | /// 166 | /// If the map did have this key present, the value is updated, and the old 167 | /// value is returned. The key is not updated, though; this matters for 168 | /// types that can be `==` without being identical. See the [module-level 169 | /// documentation] for more. 170 | /// 171 | /// # Examples 172 | /// 173 | /// ``` 174 | /// use std::collections::HashMap; 175 | /// 176 | /// let mut map = HashMap::new(); 177 | /// assert_eq!(map.insert(37, "a"), None); 178 | /// assert_eq!(map.is_empty(), false); 179 | /// 180 | /// map.insert(37, "b"); 181 | /// assert_eq!(map.insert(37, "c"), Some("b")); 182 | /// assert_eq!(map[&37], "c"); 183 | /// ``` 184 | #[inline] 185 | pub fn insert(&mut self, k: K, v: V) -> Option { 186 | self.0.insert(k, v) 187 | } 188 | 189 | /// Creates a consuming iterator visiting all the keys in arbitrary order. 190 | /// The map cannot be used after calling this. 191 | /// The iterator element type is `K`. 192 | /// 193 | /// # Examples 194 | /// 195 | /// ``` 196 | /// use std::collections::HashMap; 197 | /// 198 | /// let map = HashMap::from([ 199 | /// ("a", 1), 200 | /// ("b", 2), 201 | /// ("c", 3), 202 | /// ]); 203 | /// 204 | /// let mut vec: Vec<&str> = map.into_keys().collect(); 205 | /// // The `IntoKeys` iterator produces keys in arbitrary order, so the 206 | /// // keys must be sorted to test them against a sorted array. 207 | /// vec.sort_unstable(); 208 | /// assert_eq!(vec, ["a", "b", "c"]); 209 | /// ``` 210 | /// 211 | /// # Performance 212 | /// 213 | /// In the current implementation, iterating over keys takes O(capacity) time 214 | /// instead of O(len) because it internally visits empty buckets too. 215 | #[inline] 216 | pub fn into_keys(self) -> IntoKeys { 217 | self.0.into_keys() 218 | } 219 | 220 | /// Creates a consuming iterator visiting all the values in arbitrary order. 221 | /// The map cannot be used after calling this. 222 | /// The iterator element type is `V`. 223 | /// 224 | /// # Examples 225 | /// 226 | /// ``` 227 | /// use std::collections::HashMap; 228 | /// 229 | /// let map = HashMap::from([ 230 | /// ("a", 1), 231 | /// ("b", 2), 232 | /// ("c", 3), 233 | /// ]); 234 | /// 235 | /// let mut vec: Vec = map.into_values().collect(); 236 | /// // The `IntoValues` iterator produces values in arbitrary order, so 237 | /// // the values must be sorted to test them against a sorted array. 238 | /// vec.sort_unstable(); 239 | /// assert_eq!(vec, [1, 2, 3]); 240 | /// ``` 241 | /// 242 | /// # Performance 243 | /// 244 | /// In the current implementation, iterating over values takes O(capacity) time 245 | /// instead of O(len) because it internally visits empty buckets too. 246 | #[inline] 247 | pub fn into_values(self) -> IntoValues { 248 | self.0.into_values() 249 | } 250 | 251 | /// Removes a key from the map, returning the value at the key if the key 252 | /// was previously in the map. 253 | /// 254 | /// The key may be any borrowed form of the map's key type, but 255 | /// [`Hash`] and [`Eq`] on the borrowed form *must* match those for 256 | /// the key type. 257 | /// 258 | /// # Examples 259 | /// 260 | /// ``` 261 | /// use std::collections::HashMap; 262 | /// 263 | /// let mut map = HashMap::new(); 264 | /// map.insert(1, "a"); 265 | /// assert_eq!(map.remove(&1), Some("a")); 266 | /// assert_eq!(map.remove(&1), None); 267 | /// ``` 268 | #[inline] 269 | pub fn remove(&mut self, k: &Q) -> Option 270 | where 271 | K: Borrow, 272 | Q: Hash + Eq, 273 | { 274 | self.0.remove(k) 275 | } 276 | } 277 | 278 | impl Deref for AHashMap { 279 | type Target = HashMap; 280 | fn deref(&self) -> &Self::Target { 281 | &self.0 282 | } 283 | } 284 | 285 | impl DerefMut for AHashMap { 286 | fn deref_mut(&mut self) -> &mut Self::Target { 287 | &mut self.0 288 | } 289 | } 290 | 291 | impl UnwindSafe for AHashMap 292 | where 293 | K: UnwindSafe, 294 | V: UnwindSafe, 295 | { 296 | } 297 | 298 | impl PartialEq for AHashMap 299 | where 300 | K: Eq + Hash, 301 | V: PartialEq, 302 | S: BuildHasher, 303 | { 304 | fn eq(&self, other: &AHashMap) -> bool { 305 | self.0.eq(&other.0) 306 | } 307 | } 308 | 309 | impl Eq for AHashMap 310 | where 311 | K: Eq + Hash, 312 | V: Eq, 313 | S: BuildHasher, 314 | { 315 | } 316 | 317 | impl Index<&Q> for AHashMap 318 | where 319 | K: Eq + Hash + Borrow, 320 | Q: Eq + Hash, 321 | S: BuildHasher, 322 | { 323 | type Output = V; 324 | 325 | /// Returns a reference to the value corresponding to the supplied key. 326 | /// 327 | /// # Panics 328 | /// 329 | /// Panics if the key is not present in the `HashMap`. 330 | #[inline] 331 | fn index(&self, key: &Q) -> &V { 332 | self.0.index(key) 333 | } 334 | } 335 | 336 | impl Debug for AHashMap 337 | where 338 | K: Debug, 339 | V: Debug, 340 | S: BuildHasher, 341 | { 342 | fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { 343 | self.0.fmt(fmt) 344 | } 345 | } 346 | 347 | impl FromIterator<(K, V)> for AHashMap 348 | where 349 | K: Eq + Hash, 350 | { 351 | /// This creates a hashmap from the provided iterator using [RandomState::new]. 352 | /// See the documentation in [RandomSource] for notes about key strength. 353 | fn from_iter>(iter: T) -> Self { 354 | let mut inner = HashMap::with_hasher(RandomState::new()); 355 | inner.extend(iter); 356 | AHashMap(inner) 357 | } 358 | } 359 | 360 | impl<'a, K, V, S> IntoIterator for &'a AHashMap { 361 | type Item = (&'a K, &'a V); 362 | type IntoIter = hash_map::Iter<'a, K, V>; 363 | fn into_iter(self) -> Self::IntoIter { 364 | (&self.0).iter() 365 | } 366 | } 367 | 368 | impl<'a, K, V, S> IntoIterator for &'a mut AHashMap { 369 | type Item = (&'a K, &'a mut V); 370 | type IntoIter = hash_map::IterMut<'a, K, V>; 371 | fn into_iter(self) -> Self::IntoIter { 372 | (&mut self.0).iter_mut() 373 | } 374 | } 375 | 376 | impl IntoIterator for AHashMap { 377 | type Item = (K, V); 378 | type IntoIter = hash_map::IntoIter; 379 | fn into_iter(self) -> Self::IntoIter { 380 | self.0.into_iter() 381 | } 382 | } 383 | 384 | impl Extend<(K, V)> for AHashMap 385 | where 386 | K: Eq + Hash, 387 | S: BuildHasher, 388 | { 389 | #[inline] 390 | fn extend>(&mut self, iter: T) { 391 | self.0.extend(iter) 392 | } 393 | } 394 | 395 | impl<'a, K, V, S> Extend<(&'a K, &'a V)> for AHashMap 396 | where 397 | K: Eq + Hash + Copy + 'a, 398 | V: Copy + 'a, 399 | S: BuildHasher, 400 | { 401 | #[inline] 402 | fn extend>(&mut self, iter: T) { 403 | self.0.extend(iter) 404 | } 405 | } 406 | 407 | /// NOTE: For safety this trait impl is only available if either of the flags `runtime-rng` (on by default) or 408 | /// `compile-time-rng` are enabled. This is to prevent weakly keyed maps from being accidentally created. Instead one of 409 | /// constructors for [RandomState] must be used. 410 | #[cfg(any(feature = "compile-time-rng", feature = "runtime-rng", feature = "no-rng"))] 411 | impl Default for AHashMap { 412 | #[inline] 413 | fn default() -> AHashMap { 414 | AHashMap(HashMap::default()) 415 | } 416 | } 417 | 418 | #[cfg(feature = "serde")] 419 | impl Serialize for AHashMap 420 | where 421 | K: Serialize + Eq + Hash, 422 | V: Serialize, 423 | { 424 | fn serialize(&self, serializer: S) -> Result { 425 | self.deref().serialize(serializer) 426 | } 427 | } 428 | 429 | #[cfg(feature = "serde")] 430 | impl<'de, K, V> Deserialize<'de> for AHashMap 431 | where 432 | K: Deserialize<'de> + Eq + Hash, 433 | V: Deserialize<'de>, 434 | { 435 | fn deserialize>(deserializer: D) -> Result { 436 | let hash_map = HashMap::deserialize(deserializer); 437 | hash_map.map(|hash_map| Self(hash_map)) 438 | } 439 | 440 | fn deserialize_in_place>(deserializer: D, place: &mut Self) -> Result<(), D::Error> { 441 | use serde::de::{MapAccess, Visitor}; 442 | 443 | struct MapInPlaceVisitor<'a, K: 'a, V: 'a>(&'a mut AHashMap); 444 | 445 | impl<'a, 'de, K, V> Visitor<'de> for MapInPlaceVisitor<'a, K, V> 446 | where 447 | K: Deserialize<'de> + Eq + Hash, 448 | V: Deserialize<'de>, 449 | { 450 | type Value = (); 451 | 452 | fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { 453 | formatter.write_str("a map") 454 | } 455 | 456 | fn visit_map(self, mut map: A) -> Result 457 | where 458 | A: MapAccess<'de>, 459 | { 460 | self.0.clear(); 461 | self.0.reserve(map.size_hint().unwrap_or(0).min(4096)); 462 | 463 | while let Some((key, value)) = map.next_entry()? { 464 | self.0.insert(key, value); 465 | } 466 | 467 | Ok(()) 468 | } 469 | } 470 | 471 | deserializer.deserialize_map(MapInPlaceVisitor(place)) 472 | } 473 | } 474 | 475 | #[cfg(test)] 476 | mod test { 477 | use super::*; 478 | #[test] 479 | fn test_borrow() { 480 | let mut map: AHashMap = AHashMap::new(); 481 | map.insert("foo".to_string(), "Bar".to_string()); 482 | map.insert("Bar".to_string(), map.get("foo").unwrap().to_owned()); 483 | } 484 | 485 | #[cfg(feature = "serde")] 486 | #[test] 487 | fn test_serde() { 488 | let mut map = AHashMap::new(); 489 | map.insert("for".to_string(), 0); 490 | map.insert("bar".to_string(), 1); 491 | let mut serialization = serde_json::to_string(&map).unwrap(); 492 | let mut deserialization: AHashMap = serde_json::from_str(&serialization).unwrap(); 493 | assert_eq!(deserialization, map); 494 | 495 | map.insert("baz".to_string(), 2); 496 | serialization = serde_json::to_string(&map).unwrap(); 497 | let mut deserializer = serde_json::Deserializer::from_str(&serialization); 498 | AHashMap::deserialize_in_place(&mut deserializer, &mut deserialization).unwrap(); 499 | assert_eq!(deserialization, map); 500 | } 501 | } 502 | -------------------------------------------------------------------------------- /src/hash_quality_test.rs: -------------------------------------------------------------------------------- 1 | use core::hash::{Hash, Hasher}; 2 | use std::collections::HashMap; 3 | 4 | fn assert_sufficiently_different(a: u64, b: u64, tolerance: i32) { 5 | let (same_byte_count, same_nibble_count) = count_same_bytes_and_nibbles(a, b); 6 | assert!(same_byte_count <= tolerance, "{:x} vs {:x}: {:}", a, b, same_byte_count); 7 | assert!( 8 | same_nibble_count <= tolerance * 3, 9 | "{:x} vs {:x}: {:}", 10 | a, 11 | b, 12 | same_nibble_count 13 | ); 14 | let flipped_bits = (a ^ b).count_ones(); 15 | assert!( 16 | flipped_bits > 12 && flipped_bits < 52, 17 | "{:x} and {:x}: {:}", 18 | a, 19 | b, 20 | flipped_bits 21 | ); 22 | for rotate in 0..64 { 23 | let flipped_bits2 = (a ^ (b.rotate_left(rotate))).count_ones(); 24 | assert!( 25 | flipped_bits2 > 10 && flipped_bits2 < 54, 26 | "{:x} and {:x}: {:}", 27 | a, 28 | b.rotate_left(rotate), 29 | flipped_bits2 30 | ); 31 | } 32 | } 33 | 34 | fn count_same_bytes_and_nibbles(a: u64, b: u64) -> (i32, i32) { 35 | let mut same_byte_count = 0; 36 | let mut same_nibble_count = 0; 37 | for byte in 0..8 { 38 | let ba = (a >> (8 * byte)) as u8; 39 | let bb = (b >> (8 * byte)) as u8; 40 | if ba == bb { 41 | same_byte_count += 1; 42 | } 43 | if ba & 0xF0u8 == bb & 0xF0u8 { 44 | same_nibble_count += 1; 45 | } 46 | if ba & 0x0Fu8 == bb & 0x0Fu8 { 47 | same_nibble_count += 1; 48 | } 49 | } 50 | (same_byte_count, same_nibble_count) 51 | } 52 | 53 | fn gen_combinations(options: &[u32; 11], depth: u32, so_far: Vec, combinations: &mut Vec>) { 54 | if depth == 0 { 55 | return; 56 | } 57 | for option in options { 58 | let mut next = so_far.clone(); 59 | next.push(*option); 60 | combinations.push(next.clone()); 61 | gen_combinations(options, depth - 1, next, combinations); 62 | } 63 | } 64 | 65 | fn test_no_full_collisions(gen_hash: impl Fn() -> T) { 66 | let options: [u32; 11] = [ 67 | 0x00000000, 0x10000000, 0x20000000, 0x40000000, 0x80000000, 0xF0000000, 1, 2, 4, 8, 15, 68 | ]; 69 | let mut combinations = Vec::new(); 70 | gen_combinations(&options, 7, Vec::new(), &mut combinations); 71 | let mut map: HashMap> = HashMap::new(); 72 | for combination in combinations { 73 | use zerocopy::IntoBytes; 74 | let array = combination.as_bytes().to_vec(); 75 | let mut hasher = gen_hash(); 76 | hasher.write(&array); 77 | let hash = hasher.finish(); 78 | if let Some(value) = map.get(&hash) { 79 | assert_eq!( 80 | value, &array, 81 | "Found a collision between {:x?} and {:x?}. Hash: {:x?}", 82 | value, &array, &hash 83 | ); 84 | } else { 85 | map.insert(hash, array); 86 | } 87 | } 88 | assert_eq!(21435887, map.len()); //11^7 + 11^6 ... 89 | } 90 | 91 | fn test_keys_change_output(constructor: impl Fn(u128, u128) -> T) { 92 | let mut a = constructor(1, 1); 93 | let mut b = constructor(1, 2); 94 | let mut c = constructor(2, 1); 95 | let mut d = constructor(2, 2); 96 | "test".hash(&mut a); 97 | "test".hash(&mut b); 98 | "test".hash(&mut c); 99 | "test".hash(&mut d); 100 | assert_sufficiently_different(a.finish(), b.finish(), 1); 101 | assert_sufficiently_different(a.finish(), c.finish(), 1); 102 | assert_sufficiently_different(a.finish(), d.finish(), 1); 103 | assert_sufficiently_different(b.finish(), c.finish(), 1); 104 | assert_sufficiently_different(b.finish(), d.finish(), 1); 105 | assert_sufficiently_different(c.finish(), d.finish(), 1); 106 | } 107 | 108 | fn test_input_affect_every_byte(constructor: impl Fn(u128, u128) -> T) { 109 | let base = hash_with(&0, constructor(0, 0)); 110 | for shift in 0..16 { 111 | let mut alternatives = vec![]; 112 | for v in 0..256 { 113 | let input = (v as u128) << (shift * 8); 114 | let hasher = constructor(0, 0); 115 | alternatives.push(hash_with(&input, hasher)); 116 | } 117 | assert_each_byte_differs(shift, base, alternatives); 118 | } 119 | } 120 | 121 | ///Ensures that for every bit in the output there is some value for each byte in the key that flips it. 122 | fn test_keys_affect_every_byte(item: H, constructor: impl Fn(u128, u128) -> T) { 123 | let base = hash_with(&item, constructor(0, 0)); 124 | for shift in 0..16 { 125 | let mut alternatives1 = vec![]; 126 | let mut alternatives2 = vec![]; 127 | for v in 0..256 { 128 | let input = (v as u128) << (shift * 8); 129 | let hasher1 = constructor(input, 0); 130 | let hasher2 = constructor(0, input); 131 | let h1 = hash_with(&item, hasher1); 132 | let h2 = hash_with(&item, hasher2); 133 | alternatives1.push(h1); 134 | alternatives2.push(h2); 135 | } 136 | assert_each_byte_differs(shift, base, alternatives1); 137 | assert_each_byte_differs(shift, base, alternatives2); 138 | } 139 | } 140 | 141 | fn assert_each_byte_differs(num: u64, base: u64, alternatives: Vec) { 142 | let mut changed_bits = 0_u64; 143 | for alternative in alternatives { 144 | changed_bits |= base ^ alternative 145 | } 146 | assert_eq!( 147 | core::u64::MAX, 148 | changed_bits, 149 | "Bits changed: {:x} on num: {:?}. base {:x}", 150 | changed_bits, 151 | num, 152 | base 153 | ); 154 | } 155 | 156 | fn test_finish_is_consistent(constructor: impl Fn(u128, u128) -> T) { 157 | let mut hasher = constructor(1, 2); 158 | "Foo".hash(&mut hasher); 159 | let a = hasher.finish(); 160 | let b = hasher.finish(); 161 | assert_eq!(a, b); 162 | } 163 | 164 | fn test_single_key_bit_flip(constructor: impl Fn(u128, u128) -> T) { 165 | for bit in 0..128 { 166 | let mut a = constructor(0, 0); 167 | let mut b = constructor(0, 1 << bit); 168 | let mut c = constructor(1 << bit, 0); 169 | "1234".hash(&mut a); 170 | "1234".hash(&mut b); 171 | "1234".hash(&mut c); 172 | assert_sufficiently_different(a.finish(), b.finish(), 2); 173 | assert_sufficiently_different(a.finish(), c.finish(), 2); 174 | assert_sufficiently_different(b.finish(), c.finish(), 2); 175 | let mut a = constructor(0, 0); 176 | let mut b = constructor(0, 1 << bit); 177 | let mut c = constructor(1 << bit, 0); 178 | "12345678".hash(&mut a); 179 | "12345678".hash(&mut b); 180 | "12345678".hash(&mut c); 181 | assert_sufficiently_different(a.finish(), b.finish(), 2); 182 | assert_sufficiently_different(a.finish(), c.finish(), 2); 183 | assert_sufficiently_different(b.finish(), c.finish(), 2); 184 | let mut a = constructor(0, 0); 185 | let mut b = constructor(0, 1 << bit); 186 | let mut c = constructor(1 << bit, 0); 187 | "1234567812345678".hash(&mut a); 188 | "1234567812345678".hash(&mut b); 189 | "1234567812345678".hash(&mut c); 190 | assert_sufficiently_different(a.finish(), b.finish(), 2); 191 | assert_sufficiently_different(a.finish(), c.finish(), 2); 192 | assert_sufficiently_different(b.finish(), c.finish(), 2); 193 | } 194 | } 195 | 196 | fn test_all_bytes_matter(hasher: impl Fn() -> T) { 197 | let mut item = vec![0; 256]; 198 | let base_hash = hash(&item, &hasher); 199 | for pos in 0..256 { 200 | item[pos] = 255; 201 | let hash = hash(&item, &hasher); 202 | assert_ne!(base_hash, hash, "Position {} did not affect output", pos); 203 | item[pos] = 0; 204 | } 205 | } 206 | 207 | fn test_no_pair_collisions(hasher: impl Fn() -> T) { 208 | let base = [0_u64, 0_u64]; 209 | let base_hash = hash(&base, &hasher); 210 | for bitpos1 in 0..64 { 211 | let a = 1_u64 << bitpos1; 212 | for bitpos2 in 0..bitpos1 { 213 | let b = 1_u64 << bitpos2; 214 | let aa = hash(&[a, a], &hasher); 215 | let ab = hash(&[a, b], &hasher); 216 | let ba = hash(&[b, a], &hasher); 217 | let bb = hash(&[b, b], &hasher); 218 | assert_sufficiently_different(base_hash, aa, 3); 219 | assert_sufficiently_different(base_hash, ab, 3); 220 | assert_sufficiently_different(base_hash, ba, 3); 221 | assert_sufficiently_different(base_hash, bb, 3); 222 | assert_sufficiently_different(aa, ab, 3); 223 | assert_sufficiently_different(ab, ba, 3); 224 | assert_sufficiently_different(ba, bb, 3); 225 | assert_sufficiently_different(aa, ba, 3); 226 | assert_sufficiently_different(ab, bb, 3); 227 | assert_sufficiently_different(aa, bb, 3); 228 | } 229 | } 230 | } 231 | 232 | fn hash(b: &H, hash_builder: &dyn Fn() -> T) -> u64 { 233 | let mut hasher = hash_builder(); 234 | b.hash(&mut hasher); 235 | hasher.finish() 236 | } 237 | 238 | fn hash_with(b: &H, mut hasher: T) -> u64 { 239 | b.hash(&mut hasher); 240 | hasher.finish() 241 | } 242 | 243 | fn test_single_bit_flip(hasher: impl Fn() -> T) { 244 | let size = 32; 245 | let compare_value = hash(&0u32, &hasher); 246 | for pos in 0..size { 247 | let test_value = hash(&(1u32 << pos), &hasher); 248 | assert_sufficiently_different(compare_value, test_value, 2); 249 | } 250 | let size = 64; 251 | let compare_value = hash(&0u64, &hasher); 252 | for pos in 0..size { 253 | let test_value = hash(&(1u64 << pos), &hasher); 254 | assert_sufficiently_different(compare_value, test_value, 2); 255 | } 256 | let size = 128; 257 | let compare_value = hash(&0u128, &hasher); 258 | for pos in 0..size { 259 | let test_value = hash(&(1u128 << pos), &hasher); 260 | dbg!(compare_value, test_value); 261 | assert_sufficiently_different(compare_value, test_value, 2); 262 | } 263 | } 264 | 265 | fn test_padding_doesnot_collide(hasher: impl Fn() -> T) { 266 | for c in 0..128u8 { 267 | for string in ["", "\0", "\x01", "1234", "12345678", "1234567812345678"].iter() { 268 | let mut short = hasher(); 269 | string.hash(&mut short); 270 | let value = short.finish(); 271 | let mut padded = string.to_string(); 272 | for num in 1..=128 { 273 | let mut long = hasher(); 274 | padded.push(c as char); 275 | padded.hash(&mut long); 276 | let (same_bytes, same_nibbles) = count_same_bytes_and_nibbles(value, long.finish()); 277 | assert!( 278 | same_bytes <= 3, 279 | "{} bytes of {} -> {:x} vs {:x}", 280 | num, 281 | c, 282 | value, 283 | long.finish() 284 | ); 285 | assert!( 286 | same_nibbles <= 8, 287 | "{} bytes of {} -> {:x} vs {:x}", 288 | num, 289 | c, 290 | value, 291 | long.finish() 292 | ); 293 | let flipped_bits = (value ^ long.finish()).count_ones(); 294 | assert!(flipped_bits > 10); 295 | } 296 | if string.len() > 0 { 297 | let mut padded = string[1..].to_string(); 298 | padded.push(c as char); 299 | for num in 2..=128 { 300 | let mut long = hasher(); 301 | padded.push(c as char); 302 | padded.hash(&mut long); 303 | let (same_bytes, same_nibbles) = count_same_bytes_and_nibbles(value, long.finish()); 304 | assert!( 305 | same_bytes <= 3, 306 | "string {:?} + {} bytes of {} -> {:x} vs {:x}", 307 | string, 308 | num, 309 | c, 310 | value, 311 | long.finish() 312 | ); 313 | assert!( 314 | same_nibbles <= 8, 315 | "string {:?} + {} bytes of {} -> {:x} vs {:x}", 316 | string, 317 | num, 318 | c, 319 | value, 320 | long.finish() 321 | ); 322 | let flipped_bits = (value ^ long.finish()).count_ones(); 323 | assert!(flipped_bits > 10); 324 | } 325 | } 326 | } 327 | } 328 | } 329 | 330 | fn test_length_extension(hasher: impl Fn(u128, u128) -> T) { 331 | for key in 0..256 { 332 | let h1 = hasher(key, key); 333 | let v1 = hash_with(&[0_u8, 0, 0, 0, 0, 0, 0, 0], h1); 334 | let h2 = hasher(key, key); 335 | let v2 = hash_with(&[1_u8, 0, 0, 0, 0, 0, 0, 0, 0], h2); 336 | assert_ne!(v1, v2); 337 | } 338 | } 339 | 340 | fn test_sparse(hasher: impl Fn() -> T) { 341 | use smallvec::SmallVec; 342 | 343 | let mut buf = [0u8; 256]; 344 | let mut hashes = HashMap::new(); 345 | for idx_1 in 0..255_u8 { 346 | for idx_2 in idx_1 + 1..=255_u8 { 347 | for value_1 in [1, 2, 4, 8, 16, 32, 64, 128] { 348 | for value_2 in [ 349 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 15, 16, 17, 18, 20, 24, 31, 32, 33, 48, 64, 96, 127, 128, 129, 350 | 192, 254, 255, 351 | ] { 352 | buf[idx_1 as usize] = value_1; 353 | buf[idx_2 as usize] = value_2; 354 | let hash_value = hash_with(&buf, &mut hasher()); 355 | let keys = hashes.entry(hash_value).or_insert(SmallVec::<[[u8; 4]; 1]>::new()); 356 | keys.push([idx_1, value_1, idx_2, value_2]); 357 | buf[idx_1 as usize] = 0; 358 | buf[idx_2 as usize] = 0; 359 | } 360 | } 361 | } 362 | } 363 | hashes.retain(|_key, value| value.len() != 1); 364 | assert_eq!(0, hashes.len(), "Collision with: {:?}", hashes); 365 | } 366 | 367 | #[cfg(test)] 368 | mod fallback_tests { 369 | use crate::fallback_hash::*; 370 | use crate::hash_quality_test::*; 371 | 372 | #[test] 373 | fn fallback_single_bit_flip() { 374 | test_single_bit_flip(|| AHasher::new_with_keys(0, 0)) 375 | } 376 | 377 | #[test] 378 | fn fallback_single_key_bit_flip() { 379 | test_single_key_bit_flip(AHasher::new_with_keys) 380 | } 381 | 382 | #[test] 383 | fn fallback_all_bytes_matter() { 384 | test_all_bytes_matter(|| AHasher::new_with_keys(0, 0)); 385 | } 386 | 387 | #[test] 388 | fn fallback_test_no_pair_collisions() { 389 | test_no_pair_collisions(|| AHasher::new_with_keys(0, 0)); 390 | } 391 | 392 | #[test] 393 | fn fallback_test_no_full_collisions() { 394 | test_no_full_collisions(|| AHasher::new_with_keys(0, 0)); 395 | } 396 | 397 | #[test] 398 | fn fallback_keys_change_output() { 399 | test_keys_change_output(AHasher::new_with_keys); 400 | } 401 | 402 | #[test] 403 | fn fallback_input_affect_every_byte() { 404 | test_input_affect_every_byte(AHasher::new_with_keys); 405 | } 406 | 407 | #[test] 408 | fn fallback_keys_affect_every_byte() { 409 | //For fallback second key is not used in every hash. 410 | #[cfg(all(not(specialize), folded_multiply))] 411 | test_keys_affect_every_byte(0, |a, b| AHasher::new_with_keys(a ^ b, a)); 412 | test_keys_affect_every_byte("", |a, b| AHasher::new_with_keys(a ^ b, a)); 413 | test_keys_affect_every_byte((0, 0), |a, b| AHasher::new_with_keys(a ^ b, a)); 414 | } 415 | 416 | #[test] 417 | fn fallback_finish_is_consistant() { 418 | test_finish_is_consistent(AHasher::test_with_keys) 419 | } 420 | 421 | #[test] 422 | fn fallback_padding_doesnot_collide() { 423 | test_padding_doesnot_collide(|| AHasher::new_with_keys(0, 0)); 424 | test_padding_doesnot_collide(|| AHasher::new_with_keys(0, 2)); 425 | test_padding_doesnot_collide(|| AHasher::new_with_keys(2, 0)); 426 | test_padding_doesnot_collide(|| AHasher::new_with_keys(2, 2)); 427 | } 428 | 429 | #[test] 430 | fn fallback_length_extension() { 431 | test_length_extension(|a, b| AHasher::new_with_keys(a, b)); 432 | } 433 | 434 | #[test] 435 | fn test_no_sparse_collisions() { 436 | test_sparse(|| AHasher::new_with_keys(0, 0)); 437 | test_sparse(|| AHasher::new_with_keys(1, 2)); 438 | } 439 | } 440 | 441 | ///Basic sanity tests of the cypto properties of aHash. 442 | #[cfg(any( 443 | all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "aes", not(miri)), 444 | all(target_arch = "aarch64", target_feature = "aes", not(miri)), 445 | all(feature = "nightly-arm-aes", target_arch = "arm", target_feature = "aes", not(miri)), 446 | ))] 447 | #[cfg(test)] 448 | mod aes_tests { 449 | use crate::aes_hash::*; 450 | use crate::hash_quality_test::*; 451 | use std::hash::{Hash, Hasher}; 452 | 453 | //This encrypts to 0. 454 | const BAD_KEY2: u128 = 0x6363_6363_6363_6363_6363_6363_6363_6363; 455 | //This decrypts to 0. 456 | const BAD_KEY: u128 = 0x5252_5252_5252_5252_5252_5252_5252_5252; 457 | 458 | #[test] 459 | fn test_single_bit_in_byte() { 460 | let mut hasher1 = AHasher::test_with_keys(0, 0); 461 | 8_u32.hash(&mut hasher1); 462 | let mut hasher2 = AHasher::test_with_keys(0, 0); 463 | 0_u32.hash(&mut hasher2); 464 | assert_sufficiently_different(hasher1.finish(), hasher2.finish(), 1); 465 | } 466 | 467 | #[test] 468 | fn aes_single_bit_flip() { 469 | test_single_bit_flip(|| AHasher::test_with_keys(BAD_KEY, BAD_KEY)); 470 | test_single_bit_flip(|| AHasher::test_with_keys(BAD_KEY2, BAD_KEY2)); 471 | } 472 | 473 | #[test] 474 | fn aes_single_key_bit_flip() { 475 | test_single_key_bit_flip(AHasher::test_with_keys) 476 | } 477 | 478 | #[test] 479 | fn aes_all_bytes_matter() { 480 | test_all_bytes_matter(|| AHasher::test_with_keys(BAD_KEY, BAD_KEY)); 481 | test_all_bytes_matter(|| AHasher::test_with_keys(BAD_KEY2, BAD_KEY2)); 482 | } 483 | 484 | #[test] 485 | fn aes_test_no_pair_collisions() { 486 | test_no_pair_collisions(|| AHasher::test_with_keys(BAD_KEY, BAD_KEY)); 487 | test_no_pair_collisions(|| AHasher::test_with_keys(BAD_KEY2, BAD_KEY2)); 488 | } 489 | 490 | #[test] 491 | fn ase_test_no_full_collisions() { 492 | test_no_full_collisions(|| AHasher::test_with_keys(12345, 67890)); 493 | } 494 | 495 | #[test] 496 | fn aes_keys_change_output() { 497 | test_keys_change_output(AHasher::test_with_keys); 498 | } 499 | 500 | #[test] 501 | fn aes_input_affect_every_byte() { 502 | test_input_affect_every_byte(AHasher::test_with_keys); 503 | } 504 | 505 | #[test] 506 | fn aes_keys_affect_every_byte() { 507 | #[cfg(not(specialize))] 508 | test_keys_affect_every_byte(0, AHasher::test_with_keys); 509 | test_keys_affect_every_byte("", AHasher::test_with_keys); 510 | test_keys_affect_every_byte((0, 0), AHasher::test_with_keys); 511 | } 512 | 513 | #[test] 514 | fn aes_finish_is_consistant() { 515 | test_finish_is_consistent(AHasher::test_with_keys) 516 | } 517 | 518 | #[test] 519 | fn aes_padding_doesnot_collide() { 520 | test_padding_doesnot_collide(|| AHasher::test_with_keys(BAD_KEY, BAD_KEY)); 521 | test_padding_doesnot_collide(|| AHasher::test_with_keys(BAD_KEY2, BAD_KEY2)); 522 | } 523 | 524 | #[test] 525 | fn aes_length_extension() { 526 | test_length_extension(|a, b| AHasher::test_with_keys(a, b)); 527 | } 528 | 529 | #[test] 530 | fn aes_no_sparse_collisions() { 531 | test_sparse(|| AHasher::test_with_keys(0, 0)); 532 | test_sparse(|| AHasher::test_with_keys(1, 2)); 533 | } 534 | } 535 | -------------------------------------------------------------------------------- /src/hash_set.rs: -------------------------------------------------------------------------------- 1 | use crate::RandomState; 2 | use std::collections::{hash_set, HashSet}; 3 | use std::fmt::{self, Debug}; 4 | use std::hash::{BuildHasher, Hash}; 5 | use std::iter::FromIterator; 6 | use std::ops::{BitAnd, BitOr, BitXor, Deref, DerefMut, Sub}; 7 | 8 | #[cfg(feature = "serde")] 9 | use serde::{ 10 | de::{Deserialize, Deserializer}, 11 | ser::{Serialize, Serializer}, 12 | }; 13 | 14 | /// A [`HashSet`](std::collections::HashSet) using [`RandomState`](crate::RandomState) to hash the items. 15 | /// (Requires the `std` feature to be enabled.) 16 | #[derive(Clone)] 17 | pub struct AHashSet(HashSet); 18 | 19 | impl From> for AHashSet { 20 | fn from(item: HashSet) -> Self { 21 | AHashSet(item) 22 | } 23 | } 24 | 25 | impl From<[T; N]> for AHashSet 26 | where 27 | T: Eq + Hash, 28 | { 29 | /// # Examples 30 | /// 31 | /// ``` 32 | /// use ahash::AHashSet; 33 | /// 34 | /// let set1 = AHashSet::from([1, 2, 3, 4]); 35 | /// let set2: AHashSet<_> = [1, 2, 3, 4].into(); 36 | /// assert_eq!(set1, set2); 37 | /// ``` 38 | fn from(arr: [T; N]) -> Self { 39 | Self::from_iter(arr) 40 | } 41 | } 42 | 43 | impl Into> for AHashSet { 44 | fn into(self) -> HashSet { 45 | self.0 46 | } 47 | } 48 | 49 | impl AHashSet { 50 | /// This creates a hashset using [RandomState::new]. 51 | /// See the documentation in [RandomSource] for notes about key strength. 52 | pub fn new() -> Self { 53 | AHashSet(HashSet::with_hasher(RandomState::new())) 54 | } 55 | 56 | /// This craetes a hashset with the specified capacity using [RandomState::new]. 57 | /// See the documentation in [RandomSource] for notes about key strength. 58 | pub fn with_capacity(capacity: usize) -> Self { 59 | AHashSet(HashSet::with_capacity_and_hasher(capacity, RandomState::new())) 60 | } 61 | } 62 | 63 | impl AHashSet 64 | where 65 | S: BuildHasher, 66 | { 67 | pub fn with_hasher(hash_builder: S) -> Self { 68 | AHashSet(HashSet::with_hasher(hash_builder)) 69 | } 70 | 71 | pub fn with_capacity_and_hasher(capacity: usize, hash_builder: S) -> Self { 72 | AHashSet(HashSet::with_capacity_and_hasher(capacity, hash_builder)) 73 | } 74 | } 75 | 76 | impl Deref for AHashSet { 77 | type Target = HashSet; 78 | fn deref(&self) -> &Self::Target { 79 | &self.0 80 | } 81 | } 82 | 83 | impl DerefMut for AHashSet { 84 | fn deref_mut(&mut self) -> &mut Self::Target { 85 | &mut self.0 86 | } 87 | } 88 | 89 | impl PartialEq for AHashSet 90 | where 91 | T: Eq + Hash, 92 | S: BuildHasher, 93 | { 94 | fn eq(&self, other: &AHashSet) -> bool { 95 | self.0.eq(&other.0) 96 | } 97 | } 98 | 99 | impl Eq for AHashSet 100 | where 101 | T: Eq + Hash, 102 | S: BuildHasher, 103 | { 104 | } 105 | 106 | impl BitOr<&AHashSet> for &AHashSet 107 | where 108 | T: Eq + Hash + Clone, 109 | S: BuildHasher + Default, 110 | { 111 | type Output = AHashSet; 112 | 113 | /// Returns the union of `self` and `rhs` as a new `AHashSet`. 114 | /// 115 | /// # Examples 116 | /// 117 | /// ``` 118 | /// use ahash::AHashSet; 119 | /// 120 | /// let a: AHashSet<_> = vec![1, 2, 3].into_iter().collect(); 121 | /// let b: AHashSet<_> = vec![3, 4, 5].into_iter().collect(); 122 | /// 123 | /// let set = &a | &b; 124 | /// 125 | /// let mut i = 0; 126 | /// let expected = [1, 2, 3, 4, 5]; 127 | /// for x in &set { 128 | /// assert!(expected.contains(x)); 129 | /// i += 1; 130 | /// } 131 | /// assert_eq!(i, expected.len()); 132 | /// ``` 133 | fn bitor(self, rhs: &AHashSet) -> AHashSet { 134 | AHashSet(self.0.bitor(&rhs.0)) 135 | } 136 | } 137 | 138 | impl BitAnd<&AHashSet> for &AHashSet 139 | where 140 | T: Eq + Hash + Clone, 141 | S: BuildHasher + Default, 142 | { 143 | type Output = AHashSet; 144 | 145 | /// Returns the intersection of `self` and `rhs` as a new `AHashSet`. 146 | /// 147 | /// # Examples 148 | /// 149 | /// ``` 150 | /// use ahash::AHashSet; 151 | /// 152 | /// let a: AHashSet<_> = vec![1, 2, 3].into_iter().collect(); 153 | /// let b: AHashSet<_> = vec![2, 3, 4].into_iter().collect(); 154 | /// 155 | /// let set = &a & &b; 156 | /// 157 | /// let mut i = 0; 158 | /// let expected = [2, 3]; 159 | /// for x in &set { 160 | /// assert!(expected.contains(x)); 161 | /// i += 1; 162 | /// } 163 | /// assert_eq!(i, expected.len()); 164 | /// ``` 165 | fn bitand(self, rhs: &AHashSet) -> AHashSet { 166 | AHashSet(self.0.bitand(&rhs.0)) 167 | } 168 | } 169 | 170 | impl BitXor<&AHashSet> for &AHashSet 171 | where 172 | T: Eq + Hash + Clone, 173 | S: BuildHasher + Default, 174 | { 175 | type Output = AHashSet; 176 | 177 | /// Returns the symmetric difference of `self` and `rhs` as a new `AHashSet`. 178 | /// 179 | /// # Examples 180 | /// 181 | /// ``` 182 | /// use ahash::AHashSet; 183 | /// 184 | /// let a: AHashSet<_> = vec![1, 2, 3].into_iter().collect(); 185 | /// let b: AHashSet<_> = vec![3, 4, 5].into_iter().collect(); 186 | /// 187 | /// let set = &a ^ &b; 188 | /// 189 | /// let mut i = 0; 190 | /// let expected = [1, 2, 4, 5]; 191 | /// for x in &set { 192 | /// assert!(expected.contains(x)); 193 | /// i += 1; 194 | /// } 195 | /// assert_eq!(i, expected.len()); 196 | /// ``` 197 | fn bitxor(self, rhs: &AHashSet) -> AHashSet { 198 | AHashSet(self.0.bitxor(&rhs.0)) 199 | } 200 | } 201 | 202 | impl Sub<&AHashSet> for &AHashSet 203 | where 204 | T: Eq + Hash + Clone, 205 | S: BuildHasher + Default, 206 | { 207 | type Output = AHashSet; 208 | 209 | /// Returns the difference of `self` and `rhs` as a new `AHashSet`. 210 | /// 211 | /// # Examples 212 | /// 213 | /// ``` 214 | /// use ahash::AHashSet; 215 | /// 216 | /// let a: AHashSet<_> = vec![1, 2, 3].into_iter().collect(); 217 | /// let b: AHashSet<_> = vec![3, 4, 5].into_iter().collect(); 218 | /// 219 | /// let set = &a - &b; 220 | /// 221 | /// let mut i = 0; 222 | /// let expected = [1, 2]; 223 | /// for x in &set { 224 | /// assert!(expected.contains(x)); 225 | /// i += 1; 226 | /// } 227 | /// assert_eq!(i, expected.len()); 228 | /// ``` 229 | fn sub(self, rhs: &AHashSet) -> AHashSet { 230 | AHashSet(self.0.sub(&rhs.0)) 231 | } 232 | } 233 | 234 | impl Debug for AHashSet 235 | where 236 | T: Debug, 237 | S: BuildHasher, 238 | { 239 | fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { 240 | self.0.fmt(fmt) 241 | } 242 | } 243 | 244 | impl FromIterator for AHashSet 245 | where 246 | T: Eq + Hash, 247 | { 248 | /// This creates a hashset from the provided iterator using [RandomState::new]. 249 | /// See the documentation in [RandomSource] for notes about key strength. 250 | #[inline] 251 | fn from_iter>(iter: I) -> AHashSet { 252 | let mut inner = HashSet::with_hasher(RandomState::new()); 253 | inner.extend(iter); 254 | AHashSet(inner) 255 | } 256 | } 257 | 258 | impl<'a, T, S> IntoIterator for &'a AHashSet { 259 | type Item = &'a T; 260 | type IntoIter = hash_set::Iter<'a, T>; 261 | fn into_iter(self) -> Self::IntoIter { 262 | (&self.0).iter() 263 | } 264 | } 265 | 266 | impl IntoIterator for AHashSet { 267 | type Item = T; 268 | type IntoIter = hash_set::IntoIter; 269 | fn into_iter(self) -> Self::IntoIter { 270 | self.0.into_iter() 271 | } 272 | } 273 | 274 | impl Extend for AHashSet 275 | where 276 | T: Eq + Hash, 277 | S: BuildHasher, 278 | { 279 | #[inline] 280 | fn extend>(&mut self, iter: I) { 281 | self.0.extend(iter) 282 | } 283 | } 284 | 285 | impl<'a, T, S> Extend<&'a T> for AHashSet 286 | where 287 | T: 'a + Eq + Hash + Copy, 288 | S: BuildHasher, 289 | { 290 | #[inline] 291 | fn extend>(&mut self, iter: I) { 292 | self.0.extend(iter) 293 | } 294 | } 295 | 296 | /// NOTE: For safety this trait impl is only available available if either of the flags `runtime-rng` (on by default) or 297 | /// `compile-time-rng` are enabled. This is to prevent weakly keyed maps from being accidentally created. Instead one of 298 | /// constructors for [RandomState] must be used. 299 | #[cfg(any(feature = "compile-time-rng", feature = "runtime-rng", feature = "no-rng"))] 300 | impl Default for AHashSet { 301 | /// Creates an empty `AHashSet` with the `Default` value for the hasher. 302 | #[inline] 303 | fn default() -> AHashSet { 304 | AHashSet(HashSet::default()) 305 | } 306 | } 307 | 308 | #[cfg(feature = "serde")] 309 | impl Serialize for AHashSet 310 | where 311 | T: Serialize + Eq + Hash, 312 | { 313 | fn serialize(&self, serializer: S) -> Result { 314 | self.deref().serialize(serializer) 315 | } 316 | } 317 | 318 | #[cfg(feature = "serde")] 319 | impl<'de, T> Deserialize<'de> for AHashSet 320 | where 321 | T: Deserialize<'de> + Eq + Hash, 322 | { 323 | fn deserialize>(deserializer: D) -> Result { 324 | let hash_set = HashSet::deserialize(deserializer); 325 | hash_set.map(|hash_set| Self(hash_set)) 326 | } 327 | 328 | fn deserialize_in_place>(deserializer: D, place: &mut Self) -> Result<(), D::Error> { 329 | HashSet::deserialize_in_place(deserializer, place) 330 | } 331 | } 332 | 333 | #[cfg(all(test, feature = "serde"))] 334 | mod test { 335 | use super::*; 336 | 337 | #[test] 338 | fn test_serde() { 339 | let mut set = AHashSet::new(); 340 | set.insert("for".to_string()); 341 | set.insert("bar".to_string()); 342 | let mut serialization = serde_json::to_string(&set).unwrap(); 343 | let mut deserialization: AHashSet = serde_json::from_str(&serialization).unwrap(); 344 | assert_eq!(deserialization, set); 345 | 346 | set.insert("baz".to_string()); 347 | serialization = serde_json::to_string(&set).unwrap(); 348 | let mut deserializer = serde_json::Deserializer::from_str(&serialization); 349 | AHashSet::deserialize_in_place(&mut deserializer, &mut deserialization).unwrap(); 350 | assert_eq!(deserialization, set); 351 | } 352 | } 353 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! AHash is a high performance keyed hash function. 2 | //! 3 | //! It quickly provides a high quality hash where the result is not predictable without knowing the Key. 4 | //! AHash works with `HashMap` to hash keys, but without allowing for the possibility that an malicious user can 5 | //! induce a collision. 6 | //! 7 | //! # How aHash works 8 | //! 9 | //! When it is available aHash uses the hardware AES instructions to provide a keyed hash function. 10 | //! When it is not, aHash falls back on a slightly slower alternative algorithm. 11 | //! 12 | //! Because aHash does not have a fixed standard for its output, it is able to improve over time. 13 | //! But this also means that different computers or computers using different versions of ahash may observe different 14 | //! hash values for the same input. 15 | #![cfg_attr( 16 | all( 17 | feature = "std", 18 | any(feature = "compile-time-rng", feature = "runtime-rng", feature = "no-rng") 19 | ), 20 | doc = r##" 21 | # Basic Usage 22 | AHash provides an implementation of the [Hasher] trait. 23 | To construct a HashMap using aHash as its hasher do the following: 24 | ``` 25 | use ahash::{AHasher, RandomState}; 26 | use std::collections::HashMap; 27 | 28 | let mut map: HashMap = HashMap::default(); 29 | map.insert(12, 34); 30 | ``` 31 | 32 | ### Randomness 33 | 34 | The above requires a source of randomness to generate keys for the hashmap. By default this obtained from the OS. 35 | It is also possible to have randomness supplied via the `compile-time-rng` flag, or manually. 36 | 37 | ### If randomness is not available 38 | 39 | [AHasher::default()] can be used to hash using fixed keys. This works with 40 | [BuildHasherDefault](std::hash::BuildHasherDefault). For example: 41 | 42 | ``` 43 | use std::hash::BuildHasherDefault; 44 | use std::collections::HashMap; 45 | use ahash::AHasher; 46 | 47 | let mut m: HashMap<_, _, BuildHasherDefault> = HashMap::default(); 48 | # m.insert(12, 34); 49 | ``` 50 | It is also possible to instantiate [RandomState] directly: 51 | 52 | ``` 53 | use ahash::HashMap; 54 | use ahash::RandomState; 55 | 56 | let mut m = HashMap::with_hasher(RandomState::with_seed(42)); 57 | # m.insert(1, 2); 58 | ``` 59 | Or for uses besides a hashhmap: 60 | ``` 61 | use std::hash::BuildHasher; 62 | use ahash::RandomState; 63 | 64 | let hash_builder = RandomState::with_seed(42); 65 | let hash = hash_builder.hash_one("Some Data"); 66 | ``` 67 | There are several constructors for [RandomState] with different ways to supply seeds. 68 | 69 | # Convenience wrappers 70 | 71 | For convenience, both new-type wrappers and type aliases are provided. 72 | 73 | The new type wrappers are called called `AHashMap` and `AHashSet`. 74 | ``` 75 | use ahash::AHashMap; 76 | 77 | let mut map: AHashMap = AHashMap::new(); 78 | map.insert(12, 34); 79 | ``` 80 | This avoids the need to type "RandomState". (For convenience `From`, `Into`, and `Deref` are provided). 81 | 82 | # Aliases 83 | 84 | For even less typing and better interop with existing libraries (such as rayon) which require a `std::collection::HashMap` , 85 | the type aliases [HashMap], [HashSet] are provided. 86 | 87 | ``` 88 | use ahash::{HashMap, HashMapExt}; 89 | 90 | let mut map: HashMap = HashMap::new(); 91 | map.insert(12, 34); 92 | ``` 93 | Note the import of [HashMapExt]. This is needed for the constructor. 94 | 95 | "## 96 | )] 97 | #![deny(clippy::correctness, clippy::complexity, clippy::perf)] 98 | #![allow(clippy::pedantic, clippy::cast_lossless, clippy::unreadable_literal)] 99 | #![cfg_attr(all(not(test), not(feature = "std")), no_std)] 100 | #![cfg_attr(specialize, feature(min_specialization))] 101 | #![cfg_attr(feature = "nightly-arm-aes", feature(stdarch_arm_neon_intrinsics))] 102 | 103 | #[macro_use] 104 | mod convert; 105 | 106 | mod fallback_hash; 107 | 108 | cfg_if::cfg_if! { 109 | if #[cfg(any( 110 | all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "aes", not(miri)), 111 | all(target_arch = "aarch64", target_feature = "aes", not(miri)), 112 | all(feature = "nightly-arm-aes", target_arch = "arm", target_feature = "aes", not(miri)), 113 | ))] { 114 | mod aes_hash; 115 | pub use crate::aes_hash::AHasher; 116 | } else { 117 | pub use crate::fallback_hash::AHasher; 118 | } 119 | } 120 | 121 | cfg_if::cfg_if! { 122 | if #[cfg(feature = "std")] { 123 | mod hash_map; 124 | mod hash_set; 125 | 126 | pub use crate::hash_map::AHashMap; 127 | pub use crate::hash_set::AHashSet; 128 | 129 | /// [Hasher]: std::hash::Hasher 130 | /// [HashMap]: std::collections::HashMap 131 | /// Type alias for [HashMap] 132 | pub type HashMap = std::collections::HashMap; 133 | 134 | /// Type alias for [HashSet] 135 | pub type HashSet = std::collections::HashSet; 136 | } 137 | } 138 | 139 | #[cfg(test)] 140 | mod hash_quality_test; 141 | 142 | mod operations; 143 | pub mod random_state; 144 | mod specialize; 145 | 146 | pub use crate::random_state::RandomState; 147 | 148 | use core::hash::BuildHasher; 149 | 150 | #[cfg(feature = "std")] 151 | /// A convenience trait that can be used together with the type aliases defined to 152 | /// get access to the `new()` and `with_capacity()` methods for the HashMap type alias. 153 | pub trait HashMapExt { 154 | /// Constructs a new HashMap 155 | fn new() -> Self; 156 | /// Constructs a new HashMap with a given initial capacity 157 | fn with_capacity(capacity: usize) -> Self; 158 | } 159 | 160 | #[cfg(feature = "std")] 161 | /// A convenience trait that can be used together with the type aliases defined to 162 | /// get access to the `new()` and `with_capacity()` methods for the HashSet type aliases. 163 | pub trait HashSetExt { 164 | /// Constructs a new HashSet 165 | fn new() -> Self; 166 | /// Constructs a new HashSet with a given initial capacity 167 | fn with_capacity(capacity: usize) -> Self; 168 | } 169 | 170 | #[cfg(feature = "std")] 171 | impl HashMapExt for std::collections::HashMap 172 | where 173 | S: BuildHasher + Default, 174 | { 175 | fn new() -> Self { 176 | std::collections::HashMap::with_hasher(S::default()) 177 | } 178 | 179 | fn with_capacity(capacity: usize) -> Self { 180 | std::collections::HashMap::with_capacity_and_hasher(capacity, S::default()) 181 | } 182 | } 183 | 184 | #[cfg(feature = "std")] 185 | impl HashSetExt for std::collections::HashSet 186 | where 187 | S: BuildHasher + Default, 188 | { 189 | fn new() -> Self { 190 | std::collections::HashSet::with_hasher(S::default()) 191 | } 192 | 193 | fn with_capacity(capacity: usize) -> Self { 194 | std::collections::HashSet::with_capacity_and_hasher(capacity, S::default()) 195 | } 196 | } 197 | 198 | /// Provides a default [Hasher] with fixed keys. 199 | /// This is typically used in conjunction with [BuildHasherDefault] to create 200 | /// [AHasher]s in order to hash the keys of the map. 201 | /// 202 | /// Generally it is preferable to use [RandomState] instead, so that different 203 | /// hashmaps will have different keys. However if fixed keys are desirable this 204 | /// may be used instead. 205 | /// 206 | /// # Example 207 | /// ``` 208 | /// use std::hash::BuildHasherDefault; 209 | /// use ahash::{AHasher, RandomState}; 210 | /// use std::collections::HashMap; 211 | /// 212 | /// let mut map: HashMap> = HashMap::default(); 213 | /// map.insert(12, 34); 214 | /// ``` 215 | /// 216 | /// [BuildHasherDefault]: std::hash::BuildHasherDefault 217 | /// [Hasher]: std::hash::Hasher 218 | /// [HashMap]: std::collections::HashMap 219 | impl Default for AHasher { 220 | /// Constructs a new [AHasher] with fixed keys. 221 | /// If `std` is enabled these will be generated upon first invocation. 222 | /// Otherwise if the `compile-time-rng`feature is enabled these will be generated at compile time. 223 | /// If neither of these features are available, hardcoded constants will be used. 224 | /// 225 | /// Because the values are fixed, different hashers will all hash elements the same way. 226 | /// This could make hash values predictable, if DOS attacks are a concern. If this behaviour is 227 | /// not required, it may be preferable to use [RandomState] instead. 228 | /// 229 | /// # Examples 230 | /// 231 | /// ``` 232 | /// use ahash::AHasher; 233 | /// use std::hash::Hasher; 234 | /// 235 | /// let mut hasher_1 = AHasher::default(); 236 | /// let mut hasher_2 = AHasher::default(); 237 | /// 238 | /// hasher_1.write_u32(1234); 239 | /// hasher_2.write_u32(1234); 240 | /// 241 | /// assert_eq!(hasher_1.finish(), hasher_2.finish()); 242 | /// ``` 243 | #[inline] 244 | fn default() -> AHasher { 245 | RandomState::with_fixed_keys().build_hasher() 246 | } 247 | } 248 | 249 | // #[inline(never)] 250 | // #[doc(hidden)] 251 | // pub fn hash_test(input: &[u8]) -> u64 { 252 | // let a = RandomState::with_seeds(11, 22, 33, 44); 253 | // <[u8]>::get_hash(input, &a) 254 | // } 255 | 256 | #[cfg(feature = "std")] 257 | #[cfg(test)] 258 | mod test { 259 | use crate::convert::Convert; 260 | use crate::specialize::CallHasher; 261 | use crate::*; 262 | use core::hash::Hash; 263 | use core::hash::Hasher; 264 | use std::collections::HashMap; 265 | 266 | #[test] 267 | fn test_ahash_alias_map_construction() { 268 | let mut map = super::HashMap::with_capacity(1234); 269 | map.insert(1, "test"); 270 | } 271 | 272 | #[test] 273 | fn test_ahash_alias_set_construction() { 274 | let mut set = super::HashSet::with_capacity(1234); 275 | set.insert(1); 276 | } 277 | 278 | #[test] 279 | fn test_default_builder() { 280 | use core::hash::BuildHasherDefault; 281 | 282 | let mut map = HashMap::>::default(); 283 | map.insert(1, 3); 284 | } 285 | 286 | #[test] 287 | fn test_builder() { 288 | let mut map = HashMap::::default(); 289 | map.insert(1, 3); 290 | } 291 | 292 | #[test] 293 | fn test_conversion() { 294 | let input: &[u8] = b"dddddddd"; 295 | let bytes: u64 = as_array!(input, 8).convert(); 296 | assert_eq!(bytes, 0x6464646464646464); 297 | } 298 | 299 | #[test] 300 | fn test_non_zero() { 301 | let mut hasher1 = AHasher::new_with_keys(0, 0); 302 | let mut hasher2 = AHasher::new_with_keys(0, 0); 303 | "foo".hash(&mut hasher1); 304 | "bar".hash(&mut hasher2); 305 | assert_ne!(hasher1.finish(), 0); 306 | assert_ne!(hasher2.finish(), 0); 307 | assert_ne!(hasher1.finish(), hasher2.finish()); 308 | 309 | let mut hasher1 = AHasher::new_with_keys(0, 0); 310 | let mut hasher2 = AHasher::new_with_keys(0, 0); 311 | 3_u64.hash(&mut hasher1); 312 | 4_u64.hash(&mut hasher2); 313 | assert_ne!(hasher1.finish(), 0); 314 | assert_ne!(hasher2.finish(), 0); 315 | assert_ne!(hasher1.finish(), hasher2.finish()); 316 | } 317 | 318 | #[test] 319 | fn test_non_zero_specialized() { 320 | let hasher_build = RandomState::with_seeds(0, 0, 0, 0); 321 | 322 | let h1 = str::get_hash("foo", &hasher_build); 323 | let h2 = str::get_hash("bar", &hasher_build); 324 | assert_ne!(h1, 0); 325 | assert_ne!(h2, 0); 326 | assert_ne!(h1, h2); 327 | 328 | let h1 = u64::get_hash(&3_u64, &hasher_build); 329 | let h2 = u64::get_hash(&4_u64, &hasher_build); 330 | assert_ne!(h1, 0); 331 | assert_ne!(h2, 0); 332 | assert_ne!(h1, h2); 333 | } 334 | 335 | #[test] 336 | fn test_ahasher_construction() { 337 | let _ = AHasher::new_with_keys(1234, 5678); 338 | } 339 | 340 | #[test] 341 | fn test_specialize_reference_hash() { 342 | let hasher_build = RandomState::with_seeds(0, 0, 0, 0); 343 | let h1 = hasher_build.hash_one(1u64); 344 | let h2 = hasher_build.hash_one(&1u64); 345 | 346 | assert_eq!(h1, h2); 347 | 348 | let h1 = u64::get_hash(&1_u64, &hasher_build); 349 | let h2 = <&u64>::get_hash(&&1_u64, &hasher_build); 350 | 351 | assert_eq!(h1, h2); 352 | 353 | let h1 = hasher_build.hash_one(1u128); 354 | let h2 = hasher_build.hash_one(&1u128); 355 | 356 | assert_eq!(h1, h2); 357 | } 358 | } 359 | -------------------------------------------------------------------------------- /src/operations.rs: -------------------------------------------------------------------------------- 1 | use crate::convert::*; 2 | #[allow(unused)] 3 | use zerocopy::transmute; 4 | 5 | ///This constant comes from Kunth's prng (Empirically it works better than those from splitmix32). 6 | pub(crate) const MULTIPLE: u64 = 6364136223846793005; 7 | 8 | /// This is a constant with a lot of special properties found by automated search. 9 | /// See the unit tests below. (Below are alternative values) 10 | #[cfg(all(target_feature = "ssse3", not(miri)))] 11 | const SHUFFLE_MASK: u128 = 0x020a0700_0c01030e_050f0d08_06090b04_u128; 12 | //const SHUFFLE_MASK: u128 = 0x000d0702_0a040301_05080f0c_0e0b0609_u128; 13 | //const SHUFFLE_MASK: u128 = 0x040A0700_030E0106_0D050F08_020B0C09_u128; 14 | 15 | #[inline(always)] 16 | #[cfg(folded_multiply)] 17 | pub(crate) const fn folded_multiply(s: u64, by: u64) -> u64 { 18 | let result = (s as u128).wrapping_mul(by as u128); 19 | ((result & 0xffff_ffff_ffff_ffff) as u64) ^ ((result >> 64) as u64) 20 | } 21 | 22 | #[inline(always)] 23 | #[cfg(not(folded_multiply))] 24 | pub(crate) const fn folded_multiply(s: u64, by: u64) -> u64 { 25 | let b1 = s.wrapping_mul(by.swap_bytes()); 26 | let b2 = s.swap_bytes().wrapping_mul(!by); 27 | b1 ^ b2.swap_bytes() 28 | } 29 | 30 | /// Given a small (less than 8 byte slice) returns the same data stored in two u32s. 31 | /// (order of and non-duplication of bytes is NOT guaranteed) 32 | #[inline(always)] 33 | pub(crate) fn read_small(data: &[u8]) -> [u64; 2] { 34 | debug_assert!(data.len() <= 8); 35 | if data.len() >= 2 { 36 | if data.len() >= 4 { 37 | //len 4-8 38 | [data.read_u32().0 as u64, data.read_last_u32() as u64] 39 | } else { 40 | //len 2-3 41 | [data.read_u16().0 as u64, data[data.len() - 1] as u64] 42 | } 43 | } else { 44 | if data.len() > 0 { 45 | [data[0] as u64, data[0] as u64] 46 | } else { 47 | [0, 0] 48 | } 49 | } 50 | } 51 | 52 | #[inline(always)] 53 | pub(crate) fn shuffle(a: u128) -> u128 { 54 | #[cfg(all(target_feature = "ssse3", not(miri)))] 55 | { 56 | #[cfg(target_arch = "x86")] 57 | use core::arch::x86::*; 58 | #[cfg(target_arch = "x86_64")] 59 | use core::arch::x86_64::*; 60 | unsafe { transmute!(_mm_shuffle_epi8(transmute!(a), transmute!(SHUFFLE_MASK))) } 61 | } 62 | #[cfg(not(all(target_feature = "ssse3", not(miri))))] 63 | { 64 | a.swap_bytes() 65 | } 66 | } 67 | 68 | #[allow(unused)] //not used by fallback 69 | #[inline(always)] 70 | pub(crate) fn add_and_shuffle(a: u128, b: u128) -> u128 { 71 | let sum = add_by_64s(a.convert(), b.convert()); 72 | shuffle(sum.convert()) 73 | } 74 | 75 | #[allow(unused)] //not used by fallback 76 | #[inline(always)] 77 | pub(crate) fn shuffle_and_add(base: u128, to_add: u128) -> u128 { 78 | let shuffled: [u64; 2] = shuffle(base).convert(); 79 | add_by_64s(shuffled, to_add.convert()).convert() 80 | } 81 | 82 | #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "sse2", not(miri)))] 83 | #[inline(always)] 84 | pub(crate) fn add_by_64s(a: [u64; 2], b: [u64; 2]) -> [u64; 2] { 85 | unsafe { 86 | #[cfg(target_arch = "x86")] 87 | use core::arch::x86::*; 88 | #[cfg(target_arch = "x86_64")] 89 | use core::arch::x86_64::*; 90 | transmute!(_mm_add_epi64(transmute!(a), transmute!(b))) 91 | } 92 | } 93 | 94 | #[cfg(not(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "sse2", not(miri))))] 95 | #[inline(always)] 96 | pub(crate) fn add_by_64s(a: [u64; 2], b: [u64; 2]) -> [u64; 2] { 97 | [a[0].wrapping_add(b[0]), a[1].wrapping_add(b[1])] 98 | } 99 | 100 | #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "aes", not(miri)))] 101 | #[allow(unused)] 102 | #[inline(always)] 103 | pub(crate) fn aesenc(value: u128, xor: u128) -> u128 { 104 | #[cfg(target_arch = "x86")] 105 | use core::arch::x86::*; 106 | #[cfg(target_arch = "x86_64")] 107 | use core::arch::x86_64::*; 108 | unsafe { 109 | let value = transmute!(value); 110 | transmute!(_mm_aesenc_si128(value, transmute!(xor))) 111 | } 112 | } 113 | 114 | #[cfg(any( 115 | all(target_arch = "aarch64", target_feature = "aes", not(miri)), 116 | all(feature = "nightly-arm-aes", target_arch = "arm", target_feature = "aes", not(miri)), 117 | ))] 118 | #[allow(unused)] 119 | #[inline(always)] 120 | pub(crate) fn aesenc(value: u128, xor: u128) -> u128 { 121 | #[cfg(target_arch = "aarch64")] 122 | use core::arch::aarch64::*; 123 | #[cfg(target_arch = "arm")] 124 | use core::arch::arm::*; 125 | let res = unsafe { vaesmcq_u8(vaeseq_u8(transmute!(value), transmute!(0u128))) }; 126 | let value: u128 = transmute!(res); 127 | xor ^ value 128 | } 129 | 130 | #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "aes", not(miri)))] 131 | #[allow(unused)] 132 | #[inline(always)] 133 | pub(crate) fn aesdec(value: u128, xor: u128) -> u128 { 134 | #[cfg(target_arch = "x86")] 135 | use core::arch::x86::*; 136 | #[cfg(target_arch = "x86_64")] 137 | use core::arch::x86_64::*; 138 | unsafe { 139 | let value = transmute!(value); 140 | transmute!(_mm_aesdec_si128(value, transmute!(xor))) 141 | } 142 | } 143 | 144 | #[cfg(any( 145 | all(target_arch = "aarch64", target_feature = "aes", not(miri)), 146 | all(feature = "nightly-arm-aes", target_arch = "arm", target_feature = "aes", not(miri)), 147 | ))] 148 | #[allow(unused)] 149 | #[inline(always)] 150 | pub(crate) fn aesdec(value: u128, xor: u128) -> u128 { 151 | #[cfg(target_arch = "aarch64")] 152 | use core::arch::aarch64::*; 153 | #[cfg(target_arch = "arm")] 154 | use core::arch::arm::*; 155 | let res = unsafe { vaesimcq_u8(vaesdq_u8(transmute!(value), transmute!(0u128))) }; 156 | let value: u128 = transmute!(res); 157 | xor ^ value 158 | } 159 | 160 | #[allow(unused)] 161 | #[inline(always)] 162 | pub(crate) fn add_in_length(enc: &mut u128, len: u64) { 163 | #[cfg(all(target_arch = "x86_64", target_feature = "sse2", not(miri)))] 164 | { 165 | #[cfg(target_arch = "x86_64")] 166 | use core::arch::x86_64::*; 167 | 168 | unsafe { 169 | let enc = enc as *mut u128; 170 | let len = _mm_cvtsi64_si128(len as i64); 171 | let data = _mm_loadu_si128(enc.cast()); 172 | let sum = _mm_add_epi64(data, len); 173 | _mm_storeu_si128(enc.cast(), sum); 174 | } 175 | } 176 | #[cfg(not(all(target_arch = "x86_64", target_feature = "sse2", not(miri))))] 177 | { 178 | let mut t: [u64; 2] = enc.convert(); 179 | t[0] = t[0].wrapping_add(len); 180 | *enc = t.convert(); 181 | } 182 | } 183 | 184 | #[cfg(test)] 185 | mod test { 186 | use super::*; 187 | 188 | // This is code to search for the shuffle constant 189 | // 190 | //thread_local! { static MASK: Cell = Cell::new(0); } 191 | // 192 | // fn shuffle(a: u128) -> u128 { 193 | // use std::intrinsics::transmute; 194 | // #[cfg(target_arch = "x86")] 195 | // use core::arch::x86::*; 196 | // #[cfg(target_arch = "x86_64")] 197 | // use core::arch::x86_64::*; 198 | // MASK.with(|mask| { 199 | // unsafe { transmute!(_mm_shuffle_epi8(transmute!(a), transmute!(mask.get()))) } 200 | // }) 201 | // } 202 | // 203 | // #[test] 204 | // fn find_shuffle() { 205 | // use rand::prelude::*; 206 | // use SliceRandom; 207 | // use std::panic; 208 | // use std::io::Write; 209 | // 210 | // let mut value: [u8; 16] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 ,13, 14, 15]; 211 | // let mut rand = thread_rng(); 212 | // let mut successful_list = HashMap::new(); 213 | // for _attempt in 0..10000000 { 214 | // rand.shuffle(&mut value); 215 | // let test_val = value.convert(); 216 | // MASK.with(|mask| { 217 | // mask.set(test_val); 218 | // }); 219 | // if let Ok(successful) = panic::catch_unwind(|| { 220 | // test_shuffle_does_not_collide_with_aes(); 221 | // test_shuffle_moves_high_bits(); 222 | // test_shuffle_moves_every_value(); 223 | // //test_shuffle_does_not_loop(); 224 | // value 225 | // }) { 226 | // let successful: u128 = successful.convert(); 227 | // successful_list.insert(successful, iters_before_loop()); 228 | // } 229 | // } 230 | // let write_file = File::create("/tmp/output").unwrap(); 231 | // let mut writer = BufWriter::new(&write_file); 232 | // 233 | // for success in successful_list { 234 | // writeln!(writer, "Found successful: {:x?} - {:?}", success.0, success.1); 235 | // } 236 | // } 237 | // 238 | // fn iters_before_loop() -> u32 { 239 | // let numbered = 0x00112233_44556677_8899AABB_CCDDEEFF; 240 | // let mut shuffled = shuffle(numbered); 241 | // let mut count = 0; 242 | // loop { 243 | // // println!("{:>16x}", shuffled); 244 | // if numbered == shuffled { 245 | // break; 246 | // } 247 | // count += 1; 248 | // shuffled = shuffle(shuffled); 249 | // } 250 | // count 251 | // } 252 | 253 | #[cfg(all( 254 | any(target_arch = "x86", target_arch = "x86_64"), 255 | target_feature = "ssse3", 256 | target_feature = "aes", 257 | not(miri) 258 | ))] 259 | #[test] 260 | fn test_shuffle_does_not_collide_with_aes() { 261 | let mut value: [u8; 16] = [0; 16]; 262 | let zero_mask_enc = aesenc(0, 0); 263 | let zero_mask_dec = aesdec(0, 0); 264 | for index in 0..16 { 265 | value[index] = 1; 266 | let excluded_positions_enc: [u8; 16] = aesenc(value.convert(), zero_mask_enc).convert(); 267 | let excluded_positions_dec: [u8; 16] = aesdec(value.convert(), zero_mask_dec).convert(); 268 | let actual_location: [u8; 16] = shuffle(value.convert()).convert(); 269 | for pos in 0..16 { 270 | if actual_location[pos] != 0 { 271 | assert_eq!( 272 | 0, excluded_positions_enc[pos], 273 | "Forward Overlap between {:?} and {:?} at {}", 274 | excluded_positions_enc, actual_location, index 275 | ); 276 | assert_eq!( 277 | 0, excluded_positions_dec[pos], 278 | "Reverse Overlap between {:?} and {:?} at {}", 279 | excluded_positions_dec, actual_location, index 280 | ); 281 | } 282 | } 283 | value[index] = 0; 284 | } 285 | } 286 | 287 | #[test] 288 | fn test_shuffle_contains_each_value() { 289 | let value: [u8; 16] = 0x00010203_04050607_08090A0B_0C0D0E0F_u128.convert(); 290 | let shuffled: [u8; 16] = shuffle(value.convert()).convert(); 291 | for index in 0..16_u8 { 292 | assert!(shuffled.contains(&index), "Value is missing {}", index); 293 | } 294 | } 295 | 296 | #[test] 297 | fn test_shuffle_moves_every_value() { 298 | let mut value: [u8; 16] = [0; 16]; 299 | for index in 0..16 { 300 | value[index] = 1; 301 | let shuffled: [u8; 16] = shuffle(value.convert()).convert(); 302 | assert_eq!(0, shuffled[index], "Value is not moved {}", index); 303 | value[index] = 0; 304 | } 305 | } 306 | 307 | #[test] 308 | fn test_shuffle_moves_high_bits() { 309 | assert!( 310 | shuffle(1) > (1_u128 << 80), 311 | "Low bits must be moved to other half {:?} -> {:?}", 312 | 0, 313 | shuffle(1) 314 | ); 315 | 316 | assert!( 317 | shuffle(1_u128 << 58) >= (1_u128 << 64), 318 | "High bits must be moved to other half {:?} -> {:?}", 319 | 7, 320 | shuffle(1_u128 << 58) 321 | ); 322 | assert!( 323 | shuffle(1_u128 << 58) < (1_u128 << 112), 324 | "High bits must not remain high {:?} -> {:?}", 325 | 7, 326 | shuffle(1_u128 << 58) 327 | ); 328 | assert!( 329 | shuffle(1_u128 << 64) < (1_u128 << 64), 330 | "Low bits must be moved to other half {:?} -> {:?}", 331 | 8, 332 | shuffle(1_u128 << 64) 333 | ); 334 | assert!( 335 | shuffle(1_u128 << 64) >= (1_u128 << 16), 336 | "Low bits must not remain low {:?} -> {:?}", 337 | 8, 338 | shuffle(1_u128 << 64) 339 | ); 340 | 341 | assert!( 342 | shuffle(1_u128 << 120) < (1_u128 << 50), 343 | "High bits must be moved to low half {:?} -> {:?}", 344 | 15, 345 | shuffle(1_u128 << 120) 346 | ); 347 | } 348 | 349 | #[cfg(all( 350 | any(target_arch = "x86", target_arch = "x86_64"), 351 | target_feature = "ssse3", 352 | not(miri) 353 | ))] 354 | #[test] 355 | fn test_shuffle_does_not_loop() { 356 | let numbered = 0x00112233_44556677_8899AABB_CCDDEEFF; 357 | let mut shuffled = shuffle(numbered); 358 | for count in 0..100 { 359 | // println!("{:>16x}", shuffled); 360 | assert_ne!(numbered, shuffled, "Equal after {} vs {:x}", count, shuffled); 361 | shuffled = shuffle(shuffled); 362 | } 363 | } 364 | 365 | #[test] 366 | fn test_add_length() { 367 | let enc : [u64; 2] = [50, u64::MAX]; 368 | let mut enc : u128 = enc.convert(); 369 | add_in_length(&mut enc, u64::MAX); 370 | let enc : [u64; 2] = enc.convert(); 371 | assert_eq!(enc[1], u64::MAX); 372 | assert_eq!(enc[0], 49); 373 | } 374 | } 375 | -------------------------------------------------------------------------------- /src/random_state.rs: -------------------------------------------------------------------------------- 1 | use core::hash::Hash; 2 | cfg_if::cfg_if! { 3 | if #[cfg(any( 4 | all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "aes", not(miri)), 5 | all(target_arch = "aarch64", target_feature = "aes", not(miri)), 6 | all(feature = "nightly-arm-aes", target_arch = "arm", target_feature = "aes", not(miri)), 7 | ))] { 8 | use crate::aes_hash::*; 9 | } else { 10 | use crate::fallback_hash::*; 11 | } 12 | } 13 | cfg_if::cfg_if! { 14 | if #[cfg(feature = "std")] { 15 | extern crate std as alloc; 16 | } else { 17 | extern crate alloc; 18 | } 19 | } 20 | 21 | #[cfg(feature = "atomic-polyfill")] 22 | use portable_atomic as atomic; 23 | #[cfg(not(feature = "atomic-polyfill"))] 24 | use core::sync::atomic; 25 | 26 | use alloc::boxed::Box; 27 | use atomic::{AtomicUsize, Ordering}; 28 | use core::any::{Any, TypeId}; 29 | use core::fmt; 30 | use core::hash::BuildHasher; 31 | use core::hash::Hasher; 32 | 33 | pub(crate) const PI: [u64; 4] = [ 34 | 0x243f_6a88_85a3_08d3, 35 | 0x1319_8a2e_0370_7344, 36 | 0xa409_3822_299f_31d0, 37 | 0x082e_fa98_ec4e_6c89, 38 | ]; 39 | 40 | pub(crate) const PI2: [u64; 4] = [ 41 | 0x4528_21e6_38d0_1377, 42 | 0xbe54_66cf_34e9_0c6c, 43 | 0xc0ac_29b7_c97c_50dd, 44 | 0x3f84_d5b5_b547_0917, 45 | ]; 46 | 47 | cfg_if::cfg_if! { 48 | if #[cfg(all(feature = "compile-time-rng", any(test, fuzzing)))] { 49 | #[inline] 50 | fn get_fixed_seeds() -> &'static [[u64; 4]; 2] { 51 | use const_random::const_random; 52 | 53 | const RAND: [[u64; 4]; 2] = [ 54 | [ 55 | const_random!(u64), 56 | const_random!(u64), 57 | const_random!(u64), 58 | const_random!(u64), 59 | ], [ 60 | const_random!(u64), 61 | const_random!(u64), 62 | const_random!(u64), 63 | const_random!(u64), 64 | ] 65 | ]; 66 | &RAND 67 | } 68 | } else if #[cfg(all(feature = "runtime-rng", not(fuzzing)))] { 69 | #[inline] 70 | fn get_fixed_seeds() -> &'static [[u64; 4]; 2] { 71 | use crate::convert::Convert; 72 | 73 | static SEEDS: OnceBox<[[u64; 4]; 2]> = OnceBox::new(); 74 | 75 | SEEDS.get_or_init(|| { 76 | let mut result: [u8; 64] = [0; 64]; 77 | getrandom::fill(&mut result).expect("getrandom::fill() failed."); 78 | Box::new(result.convert()) 79 | }) 80 | } 81 | } else if #[cfg(feature = "compile-time-rng")] { 82 | #[inline] 83 | fn get_fixed_seeds() -> &'static [[u64; 4]; 2] { 84 | use const_random::const_random; 85 | 86 | const RAND: [[u64; 4]; 2] = [ 87 | [ 88 | const_random!(u64), 89 | const_random!(u64), 90 | const_random!(u64), 91 | const_random!(u64), 92 | ], [ 93 | const_random!(u64), 94 | const_random!(u64), 95 | const_random!(u64), 96 | const_random!(u64), 97 | ] 98 | ]; 99 | &RAND 100 | } 101 | } else { 102 | #[inline] 103 | fn get_fixed_seeds() -> &'static [[u64; 4]; 2] { 104 | &[PI, PI2] 105 | } 106 | } 107 | } 108 | 109 | cfg_if::cfg_if! { 110 | if #[cfg(not(all(target_arch = "arm", target_os = "none")))] { 111 | use once_cell::race::OnceBox; 112 | 113 | static RAND_SOURCE: OnceBox> = OnceBox::new(); 114 | } 115 | } 116 | /// A supplier of Randomness used for different hashers. 117 | /// See [set_random_source]. 118 | /// 119 | /// If [set_random_source] aHash will default to the best available source of randomness. 120 | /// In order this is: 121 | /// 1. OS provided random number generator (available if the `runtime-rng` flag is enabled which it is by default) - This should be very strong. 122 | /// 2. Strong compile time random numbers used to permute a static "counter". (available if `compile-time-rng` is enabled. 123 | /// __Enabling this is recommended if `runtime-rng` is not possible__) 124 | /// 3. A static counter that adds the memory address of each [RandomState] created permuted with fixed constants. 125 | /// (Similar to above but with fixed keys) - This is the weakest option. The strength of this heavily depends on whether or not ASLR is enabled. 126 | /// (Rust enables ASLR by default) 127 | pub trait RandomSource { 128 | fn gen_hasher_seed(&self) -> usize; 129 | } 130 | 131 | struct DefaultRandomSource { 132 | counter: AtomicUsize, 133 | } 134 | 135 | impl DefaultRandomSource { 136 | fn new() -> DefaultRandomSource { 137 | DefaultRandomSource { 138 | counter: AtomicUsize::new(&PI as *const _ as usize), 139 | } 140 | } 141 | 142 | #[cfg(all(target_arch = "arm", target_os = "none"))] 143 | const fn default() -> DefaultRandomSource { 144 | DefaultRandomSource { 145 | counter: AtomicUsize::new(PI[3] as usize), 146 | } 147 | } 148 | } 149 | 150 | impl RandomSource for DefaultRandomSource { 151 | cfg_if::cfg_if! { 152 | if #[cfg(all(target_arch = "arm", target_os = "none"))] { 153 | fn gen_hasher_seed(&self) -> usize { 154 | let stack = self as *const _ as usize; 155 | let previous = self.counter.load(Ordering::Relaxed); 156 | let new = previous.wrapping_add(stack); 157 | self.counter.store(new, Ordering::Relaxed); 158 | new 159 | } 160 | } else { 161 | fn gen_hasher_seed(&self) -> usize { 162 | let stack = self as *const _ as usize; 163 | self.counter.fetch_add(stack, Ordering::Relaxed) 164 | } 165 | } 166 | } 167 | } 168 | 169 | cfg_if::cfg_if! { 170 | if #[cfg(all(target_arch = "arm", target_os = "none"))] { 171 | #[inline] 172 | fn get_src() -> &'static dyn RandomSource { 173 | static RAND_SOURCE: DefaultRandomSource = DefaultRandomSource::default(); 174 | &RAND_SOURCE 175 | } 176 | } else { 177 | /// Provides an optional way to manually supply a source of randomness for Hasher keys. 178 | /// 179 | /// The provided [RandomSource] will be used to be used as a source of randomness by [RandomState] to generate new states. 180 | /// If this method is not invoked the standard source of randomness is used as described in the Readme. 181 | /// 182 | /// The source of randomness can only be set once, and must be set before the first RandomState is created. 183 | /// If the source has already been specified `Err` is returned with a `bool` indicating if the set failed because 184 | /// method was previously invoked (true) or if the default source is already being used (false). 185 | #[cfg(not(all(target_arch = "arm", target_os = "none")))] 186 | pub fn set_random_source(source: impl RandomSource + Send + Sync + 'static) -> Result<(), bool> { 187 | RAND_SOURCE.set(Box::new(Box::new(source))).map_err(|s| s.as_ref().type_id() != TypeId::of::<&DefaultRandomSource>()) 188 | } 189 | 190 | #[inline] 191 | fn get_src() -> &'static dyn RandomSource { 192 | RAND_SOURCE.get_or_init(|| Box::new(Box::new(DefaultRandomSource::new()))).as_ref() 193 | } 194 | } 195 | } 196 | 197 | /// Provides a [Hasher] factory. This is typically used (e.g. by [HashMap]) to create 198 | /// [AHasher]s in order to hash the keys of the map. See `build_hasher` below. 199 | /// 200 | /// [build_hasher]: ahash:: 201 | /// [Hasher]: std::hash::Hasher 202 | /// [BuildHasher]: std::hash::BuildHasher 203 | /// [HashMap]: std::collections::HashMap 204 | /// 205 | /// There are multiple constructors each is documented in more detail below: 206 | /// 207 | /// | Constructor | Dynamically random? | Seed | 208 | /// |---------------|---------------------|------| 209 | /// |`new` | Each instance unique|_[RandomSource]_| 210 | /// |`generate_with`| Each instance unique|`u64` x 4 + [RandomSource]| 211 | /// |`with_seed` | Fixed per process |`u64` + static random number| 212 | /// |`with_seeds` | Fixed |`u64` x 4| 213 | /// 214 | #[derive(Clone)] 215 | pub struct RandomState { 216 | pub(crate) k0: u64, 217 | pub(crate) k1: u64, 218 | pub(crate) k2: u64, 219 | pub(crate) k3: u64, 220 | } 221 | 222 | impl fmt::Debug for RandomState { 223 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 224 | f.pad("RandomState { .. }") 225 | } 226 | } 227 | 228 | impl RandomState { 229 | /// Create a new `RandomState` `BuildHasher` using random keys. 230 | /// 231 | /// Each instance will have a unique set of keys derived from [RandomSource]. 232 | /// 233 | #[inline] 234 | pub fn new() -> RandomState { 235 | let src = get_src(); 236 | let fixed = get_fixed_seeds(); 237 | Self::from_keys(&fixed[0], &fixed[1], src.gen_hasher_seed()) 238 | } 239 | 240 | /// Create a new `RandomState` `BuildHasher` based on the provided seeds, but in such a way 241 | /// that each time it is called the resulting state will be different and of high quality. 242 | /// This allows fixed constant or poor quality seeds to be provided without the problem of different 243 | /// `BuildHasher`s being identical or weak. 244 | /// 245 | /// This is done via permuting the provided values with the value of a static counter and memory address. 246 | /// (This makes this method somewhat more expensive than `with_seeds` below which does not do this). 247 | /// 248 | /// The provided values (k0-k3) do not need to be of high quality but they should not all be the same value. 249 | #[inline] 250 | pub fn generate_with(k0: u64, k1: u64, k2: u64, k3: u64) -> RandomState { 251 | let src = get_src(); 252 | let fixed = get_fixed_seeds(); 253 | RandomState::from_keys(&fixed[0], &[k0, k1, k2, k3], src.gen_hasher_seed()) 254 | } 255 | 256 | fn from_keys(a: &[u64; 4], b: &[u64; 4], c: usize) -> RandomState { 257 | let &[k0, k1, k2, k3] = a; 258 | let mut hasher = AHasher::from_random_state(&RandomState { k0, k1, k2, k3 }); 259 | hasher.write_usize(c); 260 | let mix = |l: u64, r: u64| { 261 | let mut h = hasher.clone(); 262 | h.write_u64(l); 263 | h.write_u64(r); 264 | h.finish() 265 | }; 266 | RandomState { 267 | k0: mix(b[0], b[2]), 268 | k1: mix(b[1], b[3]), 269 | k2: mix(b[2], b[1]), 270 | k3: mix(b[3], b[0]), 271 | } 272 | } 273 | 274 | /// Internal. Used by Default. 275 | #[inline] 276 | pub(crate) fn with_fixed_keys() -> RandomState { 277 | let [k0, k1, k2, k3] = get_fixed_seeds()[0]; 278 | RandomState { k0, k1, k2, k3 } 279 | } 280 | 281 | /// Build a `RandomState` from a single key. The provided key does not need to be of high quality, 282 | /// but all `RandomState`s created from the same key will produce identical hashers. 283 | /// (In contrast to `generate_with` above) 284 | /// 285 | /// This allows for explicitly setting the seed to be used. 286 | /// 287 | /// Note: This method does not require the provided seed to be strong. 288 | #[inline] 289 | pub fn with_seed(key: usize) -> RandomState { 290 | let fixed = get_fixed_seeds(); 291 | RandomState::from_keys(&fixed[0], &fixed[1], key) 292 | } 293 | 294 | /// Allows for explicitly setting the seeds to used. 295 | /// All `RandomState`s created with the same set of keys key will produce identical hashers. 296 | /// (In contrast to `generate_with` above) 297 | /// 298 | /// Note: If DOS resistance is desired one of these should be a decent quality random number. 299 | /// If 4 high quality random number are not cheaply available this method is robust against 0s being passed for 300 | /// one or more of the parameters or the same value being passed for more than one parameter. 301 | /// It is recommended to pass numbers in order from highest to lowest quality (if there is any difference). 302 | #[inline] 303 | pub const fn with_seeds(k0: u64, k1: u64, k2: u64, k3: u64) -> RandomState { 304 | RandomState { 305 | k0: k0 ^ PI2[0], 306 | k1: k1 ^ PI2[1], 307 | k2: k2 ^ PI2[2], 308 | k3: k3 ^ PI2[3], 309 | } 310 | } 311 | 312 | /// Calculates the hash of a single value. This provides a more convenient (and faster) way to obtain a hash: 313 | /// For example: 314 | #[cfg_attr( 315 | feature = "std", 316 | doc = r##" # Examples 317 | ``` 318 | use std::hash::BuildHasher; 319 | use ahash::RandomState; 320 | 321 | let hash_builder = RandomState::new(); 322 | let hash = hash_builder.hash_one("Some Data"); 323 | ``` 324 | "## 325 | )] 326 | /// This is similar to: 327 | #[cfg_attr( 328 | feature = "std", 329 | doc = r##" # Examples 330 | ``` 331 | use std::hash::{BuildHasher, Hash, Hasher}; 332 | use ahash::RandomState; 333 | 334 | let hash_builder = RandomState::new(); 335 | let mut hasher = hash_builder.build_hasher(); 336 | "Some Data".hash(&mut hasher); 337 | let hash = hasher.finish(); 338 | ``` 339 | "## 340 | )] 341 | /// (Note that these two ways to get a hash may not produce the same value for the same data) 342 | /// 343 | /// This is intended as a convenience for code which *consumes* hashes, such 344 | /// as the implementation of a hash table or in unit tests that check 345 | /// whether a custom [`Hash`] implementation behaves as expected. 346 | /// 347 | /// This must not be used in any code which *creates* hashes, such as in an 348 | /// implementation of [`Hash`]. The way to create a combined hash of 349 | /// multiple values is to call [`Hash::hash`] multiple times using the same 350 | /// [`Hasher`], not to call this method repeatedly and combine the results. 351 | #[inline] 352 | pub fn hash_one(&self, x: T) -> u64 353 | where 354 | Self: Sized, 355 | { 356 | use crate::specialize::CallHasher; 357 | T::get_hash(&x, self) 358 | } 359 | } 360 | 361 | /// Creates an instance of RandomState using keys obtained from the random number generator. 362 | /// Each instance created in this way will have a unique set of keys. (But the resulting instance 363 | /// can be used to create many hashers each or which will have the same keys.) 364 | /// 365 | /// This is the same as [RandomState::new()] 366 | /// 367 | /// NOTE: For safety this trait impl is only available available if either of the flags `runtime-rng` (on by default) or 368 | /// `compile-time-rng` are enabled. This is to prevent weakly keyed maps from being accidentally created. Instead one of 369 | /// constructors for [RandomState] must be used. 370 | #[cfg(any(feature = "compile-time-rng", feature = "runtime-rng", feature = "no-rng"))] 371 | impl Default for RandomState { 372 | #[inline] 373 | fn default() -> Self { 374 | Self::new() 375 | } 376 | } 377 | 378 | impl BuildHasher for RandomState { 379 | type Hasher = AHasher; 380 | 381 | /// Constructs a new [AHasher] with keys based on this [RandomState] object. 382 | /// This means that two different [RandomState]s will will generate 383 | /// [AHasher]s that will return different hashcodes, but [Hasher]s created from the same [BuildHasher] 384 | /// will generate the same hashes for the same input data. 385 | /// 386 | #[cfg_attr( 387 | feature = "std", 388 | doc = r##" # Examples 389 | ``` 390 | use ahash::{AHasher, RandomState}; 391 | use std::hash::{Hasher, BuildHasher}; 392 | 393 | let build_hasher = RandomState::new(); 394 | let mut hasher_1 = build_hasher.build_hasher(); 395 | let mut hasher_2 = build_hasher.build_hasher(); 396 | 397 | hasher_1.write_u32(1234); 398 | hasher_2.write_u32(1234); 399 | 400 | assert_eq!(hasher_1.finish(), hasher_2.finish()); 401 | 402 | let other_build_hasher = RandomState::new(); 403 | let mut different_hasher = other_build_hasher.build_hasher(); 404 | different_hasher.write_u32(1234); 405 | assert_ne!(different_hasher.finish(), hasher_1.finish()); 406 | ``` 407 | "## 408 | )] 409 | /// [Hasher]: std::hash::Hasher 410 | /// [BuildHasher]: std::hash::BuildHasher 411 | /// [HashMap]: std::collections::HashMap 412 | #[inline] 413 | fn build_hasher(&self) -> AHasher { 414 | AHasher::from_random_state(self) 415 | } 416 | 417 | /// Calculates the hash of a single value. This provides a more convenient (and faster) way to obtain a hash: 418 | /// For example: 419 | #[cfg_attr( 420 | feature = "std", 421 | doc = r##" # Examples 422 | ``` 423 | use std::hash::BuildHasher; 424 | use ahash::RandomState; 425 | 426 | let hash_builder = RandomState::new(); 427 | let hash = hash_builder.hash_one("Some Data"); 428 | ``` 429 | "## 430 | )] 431 | /// This is similar to: 432 | #[cfg_attr( 433 | feature = "std", 434 | doc = r##" # Examples 435 | ``` 436 | use std::hash::{BuildHasher, Hash, Hasher}; 437 | use ahash::RandomState; 438 | 439 | let hash_builder = RandomState::new(); 440 | let mut hasher = hash_builder.build_hasher(); 441 | "Some Data".hash(&mut hasher); 442 | let hash = hasher.finish(); 443 | ``` 444 | "## 445 | )] 446 | /// (Note that these two ways to get a hash may not produce the same value for the same data) 447 | /// 448 | /// This is intended as a convenience for code which *consumes* hashes, such 449 | /// as the implementation of a hash table or in unit tests that check 450 | /// whether a custom [`Hash`] implementation behaves as expected. 451 | /// 452 | /// This must not be used in any code which *creates* hashes, such as in an 453 | /// implementation of [`Hash`]. The way to create a combined hash of 454 | /// multiple values is to call [`Hash::hash`] multiple times using the same 455 | /// [`Hasher`], not to call this method repeatedly and combine the results. 456 | #[cfg(specialize)] 457 | #[inline] 458 | fn hash_one(&self, x: T) -> u64 { 459 | RandomState::hash_one(self, x) 460 | } 461 | } 462 | 463 | #[cfg(specialize)] 464 | impl RandomState { 465 | #[inline] 466 | pub(crate) fn hash_as_u64(&self, value: &T) -> u64 { 467 | let mut hasher = AHasherU64 { 468 | buffer: self.k1, 469 | pad: self.k0, 470 | }; 471 | value.hash(&mut hasher); 472 | hasher.finish() 473 | } 474 | 475 | #[inline] 476 | pub(crate) fn hash_as_fixed_length(&self, value: &T) -> u64 { 477 | let mut hasher = AHasherFixed(self.build_hasher()); 478 | value.hash(&mut hasher); 479 | hasher.finish() 480 | } 481 | 482 | #[inline] 483 | pub(crate) fn hash_as_str(&self, value: &T) -> u64 { 484 | let mut hasher = AHasherStr(self.build_hasher()); 485 | value.hash(&mut hasher); 486 | hasher.finish() 487 | } 488 | } 489 | 490 | #[cfg(test)] 491 | mod test { 492 | use super::*; 493 | 494 | #[test] 495 | fn test_unique() { 496 | let a = RandomState::generate_with(1, 2, 3, 4); 497 | let b = RandomState::generate_with(1, 2, 3, 4); 498 | assert_ne!(a.build_hasher().finish(), b.build_hasher().finish()); 499 | } 500 | 501 | #[cfg(all(feature = "runtime-rng", not(all(feature = "compile-time-rng", test))))] 502 | #[test] 503 | fn test_not_pi() { 504 | assert_ne!(PI, get_fixed_seeds()[0]); 505 | } 506 | 507 | #[cfg(all(feature = "compile-time-rng", any(not(feature = "runtime-rng"), test)))] 508 | #[test] 509 | fn test_not_pi_const() { 510 | assert_ne!(PI, get_fixed_seeds()[0]); 511 | } 512 | 513 | #[cfg(all(not(feature = "runtime-rng"), not(feature = "compile-time-rng")))] 514 | #[test] 515 | fn test_pi() { 516 | assert_eq!(PI, get_fixed_seeds()[0]); 517 | } 518 | 519 | #[test] 520 | fn test_with_seeds_const() { 521 | const _CONST_RANDOM_STATE: RandomState = RandomState::with_seeds(17, 19, 21, 23); 522 | } 523 | } 524 | -------------------------------------------------------------------------------- /src/specialize.rs: -------------------------------------------------------------------------------- 1 | use crate::RandomState; 2 | use core::hash::BuildHasher; 3 | use core::hash::Hash; 4 | use core::hash::Hasher; 5 | 6 | #[cfg(not(feature = "std"))] 7 | extern crate alloc; 8 | #[cfg(feature = "std")] 9 | extern crate std as alloc; 10 | 11 | #[cfg(specialize)] 12 | use alloc::string::String; 13 | #[cfg(specialize)] 14 | use alloc::vec::Vec; 15 | 16 | /// Provides a way to get an optimized hasher for a given data type. 17 | /// Rather than using a Hasher generically which can hash any value, this provides a way to get a specialized hash 18 | /// for a specific type. So this may be faster for primitive types. 19 | pub(crate) trait CallHasher { 20 | fn get_hash(value: &H, random_state: &RandomState) -> u64; 21 | } 22 | 23 | #[cfg(not(specialize))] 24 | impl CallHasher for T 25 | where 26 | T: Hash + ?Sized, 27 | { 28 | #[inline] 29 | fn get_hash(value: &H, random_state: &RandomState) -> u64 { 30 | let mut hasher = random_state.build_hasher(); 31 | value.hash(&mut hasher); 32 | hasher.finish() 33 | } 34 | } 35 | 36 | #[cfg(specialize)] 37 | impl CallHasher for T 38 | where 39 | T: Hash + ?Sized, 40 | { 41 | #[inline] 42 | default fn get_hash(value: &H, random_state: &RandomState) -> u64 { 43 | let mut hasher = random_state.build_hasher(); 44 | value.hash(&mut hasher); 45 | hasher.finish() 46 | } 47 | } 48 | 49 | macro_rules! call_hasher_impl_u64 { 50 | ($typ:ty) => { 51 | #[cfg(specialize)] 52 | impl CallHasher for $typ { 53 | #[inline] 54 | fn get_hash(value: &H, random_state: &RandomState) -> u64 { 55 | random_state.hash_as_u64(value) 56 | } 57 | } 58 | }; 59 | } 60 | call_hasher_impl_u64!(u8); 61 | call_hasher_impl_u64!(u16); 62 | call_hasher_impl_u64!(u32); 63 | call_hasher_impl_u64!(u64); 64 | call_hasher_impl_u64!(i8); 65 | call_hasher_impl_u64!(i16); 66 | call_hasher_impl_u64!(i32); 67 | call_hasher_impl_u64!(i64); 68 | call_hasher_impl_u64!(&u8); 69 | call_hasher_impl_u64!(&u16); 70 | call_hasher_impl_u64!(&u32); 71 | call_hasher_impl_u64!(&u64); 72 | call_hasher_impl_u64!(&i8); 73 | call_hasher_impl_u64!(&i16); 74 | call_hasher_impl_u64!(&i32); 75 | call_hasher_impl_u64!(&i64); 76 | 77 | macro_rules! call_hasher_impl_fixed_length{ 78 | ($typ:ty) => { 79 | #[cfg(specialize)] 80 | impl CallHasher for $typ { 81 | #[inline] 82 | fn get_hash(value: &H, random_state: &RandomState) -> u64 { 83 | random_state.hash_as_fixed_length(value) 84 | } 85 | } 86 | }; 87 | } 88 | 89 | call_hasher_impl_fixed_length!(u128); 90 | call_hasher_impl_fixed_length!(i128); 91 | call_hasher_impl_fixed_length!(usize); 92 | call_hasher_impl_fixed_length!(isize); 93 | call_hasher_impl_fixed_length!(&u128); 94 | call_hasher_impl_fixed_length!(&i128); 95 | call_hasher_impl_fixed_length!(&usize); 96 | call_hasher_impl_fixed_length!(&isize); 97 | 98 | #[cfg(specialize)] 99 | impl CallHasher for [u8] { 100 | #[inline] 101 | fn get_hash(value: &H, random_state: &RandomState) -> u64 { 102 | random_state.hash_as_str(value) 103 | } 104 | } 105 | 106 | #[cfg(specialize)] 107 | impl CallHasher for Vec { 108 | #[inline] 109 | fn get_hash(value: &H, random_state: &RandomState) -> u64 { 110 | random_state.hash_as_str(value) 111 | } 112 | } 113 | 114 | #[cfg(specialize)] 115 | impl CallHasher for str { 116 | #[inline] 117 | fn get_hash(value: &H, random_state: &RandomState) -> u64 { 118 | random_state.hash_as_str(value) 119 | } 120 | } 121 | 122 | #[cfg(all(specialize))] 123 | impl CallHasher for String { 124 | #[inline] 125 | fn get_hash(value: &H, random_state: &RandomState) -> u64 { 126 | random_state.hash_as_str(value) 127 | } 128 | } 129 | 130 | #[cfg(test)] 131 | mod test { 132 | use super::*; 133 | use crate::*; 134 | 135 | #[test] 136 | #[cfg(specialize)] 137 | pub fn test_specialized_invoked() { 138 | let build_hasher = RandomState::with_seeds(1, 2, 3, 4); 139 | let shortened = u64::get_hash(&0, &build_hasher); 140 | let mut hasher = AHasher::new_with_keys(1, 2); 141 | 0_u64.hash(&mut hasher); 142 | assert_ne!(hasher.finish(), shortened); 143 | } 144 | 145 | /// Tests that some non-trivial transformation takes place. 146 | #[test] 147 | pub fn test_input_processed() { 148 | let build_hasher = RandomState::with_seeds(2, 2, 2, 2); 149 | assert_ne!(0, u64::get_hash(&0, &build_hasher)); 150 | assert_ne!(1, u64::get_hash(&0, &build_hasher)); 151 | assert_ne!(2, u64::get_hash(&0, &build_hasher)); 152 | assert_ne!(3, u64::get_hash(&0, &build_hasher)); 153 | assert_ne!(4, u64::get_hash(&0, &build_hasher)); 154 | assert_ne!(5, u64::get_hash(&0, &build_hasher)); 155 | 156 | assert_ne!(0, u64::get_hash(&1, &build_hasher)); 157 | assert_ne!(1, u64::get_hash(&1, &build_hasher)); 158 | assert_ne!(2, u64::get_hash(&1, &build_hasher)); 159 | assert_ne!(3, u64::get_hash(&1, &build_hasher)); 160 | assert_ne!(4, u64::get_hash(&1, &build_hasher)); 161 | assert_ne!(5, u64::get_hash(&1, &build_hasher)); 162 | 163 | let xored = u64::get_hash(&0, &build_hasher) ^ u64::get_hash(&1, &build_hasher); 164 | assert_ne!(0, xored); 165 | assert_ne!(1, xored); 166 | assert_ne!(2, xored); 167 | assert_ne!(3, xored); 168 | assert_ne!(4, xored); 169 | assert_ne!(5, xored); 170 | } 171 | 172 | #[test] 173 | pub fn test_ref_independent() { 174 | let build_hasher = RandomState::with_seeds(1, 2, 3, 4); 175 | assert_eq!(u8::get_hash(&&1, &build_hasher), u8::get_hash(&1, &build_hasher)); 176 | assert_eq!(u16::get_hash(&&2, &build_hasher), u16::get_hash(&2, &build_hasher)); 177 | assert_eq!(u32::get_hash(&&3, &build_hasher), u32::get_hash(&3, &build_hasher)); 178 | assert_eq!(u64::get_hash(&&4, &build_hasher), u64::get_hash(&4, &build_hasher)); 179 | assert_eq!(u128::get_hash(&&5, &build_hasher), u128::get_hash(&5, &build_hasher)); 180 | assert_eq!( 181 | str::get_hash(&"test", &build_hasher), 182 | str::get_hash("test", &build_hasher) 183 | ); 184 | assert_eq!( 185 | str::get_hash(&"test", &build_hasher), 186 | String::get_hash(&"test".to_string(), &build_hasher) 187 | ); 188 | #[cfg(specialize)] 189 | assert_eq!( 190 | str::get_hash(&"test", &build_hasher), 191 | <[u8]>::get_hash("test".as_bytes(), &build_hasher) 192 | ); 193 | 194 | let build_hasher = RandomState::with_seeds(10, 20, 30, 40); 195 | assert_eq!(u8::get_hash(&&&1, &build_hasher), u8::get_hash(&1, &build_hasher)); 196 | assert_eq!(u16::get_hash(&&&2, &build_hasher), u16::get_hash(&2, &build_hasher)); 197 | assert_eq!(u32::get_hash(&&&3, &build_hasher), u32::get_hash(&3, &build_hasher)); 198 | assert_eq!(u64::get_hash(&&&4, &build_hasher), u64::get_hash(&4, &build_hasher)); 199 | assert_eq!(u128::get_hash(&&&5, &build_hasher), u128::get_hash(&5, &build_hasher)); 200 | assert_eq!( 201 | str::get_hash(&&"test", &build_hasher), 202 | str::get_hash("test", &build_hasher) 203 | ); 204 | assert_eq!( 205 | str::get_hash(&&"test", &build_hasher), 206 | String::get_hash(&"test".to_string(), &build_hasher) 207 | ); 208 | #[cfg(specialize)] 209 | assert_eq!( 210 | str::get_hash(&&"test", &build_hasher), 211 | <[u8]>::get_hash(&"test".to_string().into_bytes(), &build_hasher) 212 | ); 213 | } 214 | } 215 | -------------------------------------------------------------------------------- /tests/bench.rs: -------------------------------------------------------------------------------- 1 | #![cfg_attr(specialize, feature(build_hasher_simple_hash_one))] 2 | 3 | use ahash::{AHasher, RandomState}; 4 | use criterion::*; 5 | use fxhash::FxHasher; 6 | use rand::Rng; 7 | use std::collections::hash_map::DefaultHasher; 8 | use std::hash::{BuildHasherDefault, Hash, Hasher}; 9 | 10 | // Needs to be in sync with `src/lib.rs` 11 | const AHASH_IMPL: &str = if cfg!(any( 12 | all( 13 | any(target_arch = "x86", target_arch = "x86_64"), 14 | target_feature = "aes", 15 | not(miri), 16 | ), 17 | all(target_arch = "aarch64", target_feature = "aes", not(miri)), 18 | all( 19 | feature = "nightly-arm-aes", 20 | target_arch = "arm", 21 | target_feature = "aes", 22 | not(miri) 23 | ), 24 | )) { 25 | "aeshash" 26 | } else { 27 | "fallbackhash" 28 | }; 29 | 30 | fn ahash(b: &H) -> u64 { 31 | let build_hasher = RandomState::with_seeds(1, 2, 3, 4); 32 | build_hasher.hash_one(b) 33 | } 34 | 35 | fn fnvhash(b: &H) -> u64 { 36 | let mut hasher = fnv::FnvHasher::default(); 37 | b.hash(&mut hasher); 38 | hasher.finish() 39 | } 40 | 41 | fn siphash(b: &H) -> u64 { 42 | let mut hasher = DefaultHasher::default(); 43 | b.hash(&mut hasher); 44 | hasher.finish() 45 | } 46 | 47 | fn fxhash(b: &H) -> u64 { 48 | let mut hasher = FxHasher::default(); 49 | b.hash(&mut hasher); 50 | hasher.finish() 51 | } 52 | 53 | fn seahash(b: &H) -> u64 { 54 | let mut hasher = seahash::SeaHasher::default(); 55 | b.hash(&mut hasher); 56 | hasher.finish() 57 | } 58 | 59 | const STRING_LENGTHS: [u32; 12] = [1, 3, 4, 7, 8, 15, 16, 24, 33, 68, 132, 1024]; 60 | 61 | fn gen_strings() -> Vec { 62 | STRING_LENGTHS 63 | .iter() 64 | .map(|len| { 65 | let mut string = String::default(); 66 | for pos in 1..=*len { 67 | let c = (48 + (pos % 10) as u8) as char; 68 | string.push(c); 69 | } 70 | string 71 | }) 72 | .collect() 73 | } 74 | 75 | macro_rules! bench_inputs { 76 | ($group:ident, $hash:ident) => { 77 | // Number of iterations per batch should be high enough to hide timing overhead. 78 | let size = BatchSize::NumIterations(50_000); 79 | 80 | let mut rng = rand::thread_rng(); 81 | $group.bench_function("u8", |b| b.iter_batched(|| rng.gen::(), |v| $hash(&v), size)); 82 | $group.bench_function("u16", |b| b.iter_batched(|| rng.gen::(), |v| $hash(&v), size)); 83 | $group.bench_function("u32", |b| b.iter_batched(|| rng.gen::(), |v| $hash(&v), size)); 84 | $group.bench_function("u64", |b| b.iter_batched(|| rng.gen::(), |v| $hash(&v), size)); 85 | $group.bench_function("u128", |b| b.iter_batched(|| rng.gen::(), |v| $hash(&v), size)); 86 | $group.bench_with_input("strings", &gen_strings(), |b, s| b.iter(|| $hash(black_box(s)))); 87 | }; 88 | } 89 | 90 | fn bench_ahash(c: &mut Criterion) { 91 | let mut group = c.benchmark_group(AHASH_IMPL); 92 | bench_inputs!(group, ahash); 93 | } 94 | 95 | fn bench_fx(c: &mut Criterion) { 96 | let mut group = c.benchmark_group("fx"); 97 | bench_inputs!(group, fxhash); 98 | } 99 | 100 | fn bench_fnv(c: &mut Criterion) { 101 | let mut group = c.benchmark_group("fnv"); 102 | bench_inputs!(group, fnvhash); 103 | } 104 | 105 | fn bench_sea(c: &mut Criterion) { 106 | let mut group = c.benchmark_group("sea"); 107 | bench_inputs!(group, seahash); 108 | } 109 | 110 | fn bench_sip(c: &mut Criterion) { 111 | let mut group = c.benchmark_group("sip"); 112 | bench_inputs!(group, siphash); 113 | } 114 | 115 | fn bench_map(c: &mut Criterion) { 116 | #[cfg(feature = "std")] 117 | { 118 | let mut group = c.benchmark_group("map"); 119 | group.bench_function("aHash-alias", |b| { 120 | b.iter(|| { 121 | let hm: ahash::HashMap = (0..1_000_000).map(|i| (i, i)).collect(); 122 | let mut sum = 0; 123 | for i in 0..1_000_000 { 124 | if let Some(x) = hm.get(&i) { 125 | sum += x; 126 | } 127 | } 128 | }) 129 | }); 130 | group.bench_function("aHash-hashBrown", |b| { 131 | b.iter(|| { 132 | let hm: hashbrown::HashMap = (0..1_000_000).map(|i| (i, i)).collect(); 133 | let mut sum = 0; 134 | for i in 0..1_000_000 { 135 | if let Some(x) = hm.get(&i) { 136 | sum += x; 137 | } 138 | } 139 | }) 140 | }); 141 | group.bench_function("aHash-hashBrown-explicit", |b| { 142 | b.iter(|| { 143 | let hm: hashbrown::HashMap = (0..1_000_000).map(|i| (i, i)).collect(); 144 | let mut sum = 0; 145 | for i in 0..1_000_000 { 146 | if let Some(x) = hm.get(&i) { 147 | sum += x; 148 | } 149 | } 150 | }) 151 | }); 152 | group.bench_function("aHash-wrapper", |b| { 153 | b.iter(|| { 154 | let hm: ahash::AHashMap = (0..1_000_000).map(|i| (i, i)).collect(); 155 | let mut sum = 0; 156 | for i in 0..1_000_000 { 157 | if let Some(x) = hm.get(&i) { 158 | sum += x; 159 | } 160 | } 161 | }) 162 | }); 163 | group.bench_function("aHash-rand", |b| { 164 | b.iter(|| { 165 | let hm: std::collections::HashMap = (0..1_000_000).map(|i| (i, i)).collect(); 166 | let mut sum = 0; 167 | for i in 0..1_000_000 { 168 | if let Some(x) = hm.get(&i) { 169 | sum += x; 170 | } 171 | } 172 | }) 173 | }); 174 | group.bench_function("aHash-default", |b| { 175 | b.iter(|| { 176 | let hm: std::collections::HashMap> = 177 | (0..1_000_000).map(|i| (i, i)).collect(); 178 | let mut sum = 0; 179 | for i in 0..1_000_000 { 180 | if let Some(x) = hm.get(&i) { 181 | sum += x; 182 | } 183 | } 184 | }) 185 | }); 186 | } 187 | } 188 | 189 | criterion_main!(benches); 190 | 191 | criterion_group!( 192 | benches, 193 | bench_ahash, 194 | bench_fx, 195 | bench_fnv, 196 | bench_sea, 197 | bench_sip, 198 | bench_map 199 | ); 200 | -------------------------------------------------------------------------------- /tests/map_tests.rs: -------------------------------------------------------------------------------- 1 | #![cfg_attr(specialize, feature(build_hasher_simple_hash_one))] 2 | 3 | use std::hash::{BuildHasher, Hash, Hasher}; 4 | 5 | use ahash::RandomState; 6 | use criterion::*; 7 | use fxhash::FxHasher; 8 | 9 | fn gen_word_pairs() -> Vec { 10 | let words: Vec<_> = r#" 11 | a, ability, able, about, above, accept, according, account, across, act, action, 12 | activity, actually, add, address, administration, admit, adult, affect, after, 13 | again, against, age, agency, agent, ago, agree, agreement, ahead, air, all, 14 | allow, almost, alone, along, already, also, although, always, American, among, 15 | amount, analysis, and, animal, another, answer, any, anyone, anything, appear, 16 | apply, approach, area, argue, arm, around, arrive, art, article, artist, as, 17 | ask, assume, at, attack, attention, attorney, audience, author, authority, 18 | available, avoid, away, baby, back, bad, bag, ball, bank, bar, base, be, beat, 19 | beautiful, because, become, bed, before, begin, behavior, behind, believe, 20 | benefit, best, better, between, beyond, big, bill, billion, bit, black, blood, 21 | blue, board, body, book, born, both, box, boy, break, bring, brother, budget, 22 | build, building, business, but, buy, by, call, camera, campaign, can, cancer, 23 | candidate, capital, car, card, care, career, carry, case, catch, cause, cell, 24 | center, central, century, certain, certainly, chair, challenge, chance, change, 25 | character, charge, check, child, choice, choose, church, citizen, city, civil, 26 | claim, class, clear, clearly, close, coach, cold, collection, college, color, 27 | come, commercial, common, community, company, compare, computer, concern, 28 | condition, conference, Congress, consider, consumer, contain, continue, control, 29 | cost, could, country, couple, course, court, cover, create, crime, cultural, 30 | culture, cup, current, customer, cut, dark, data, daughter, day, dead, deal, 31 | death, debate, decade, decide, decision, deep, defense, degree, Democrat, 32 | democratic, describe, design, despite, detail, determine, develop, development, 33 | die, difference, different, difficult, dinner, direction, director, discover, 34 | discuss, discussion, disease, do, doctor, dog, door, down, draw, dream, drive, 35 | drop, drug, during, each, early, east, easy, eat, economic, economy, edge, 36 | education, effect, effort, eight, either, election, else, employee, end, energy, 37 | enjoy, enough, enter, entire, environment, environmental, especially, establish, 38 | even, evening, event, ever, every, everybody, everyone, everything, evidence, 39 | exactly, example, executive, exist, expect, experience, expert, explain, eye, 40 | face, fact, factor, fail, fall, family, far, fast, father, fear, federal, feel, 41 | feeling, few, field, fight, figure, fill, film, final, finally, financial, find, 42 | fine, finger, finish, fire, firm, first, fish, five, floor, fly, focus, follow, 43 | food, foot, for, force, foreign, forget, form, former, forward, four, free, 44 | friend, from, front, full, fund, future, game, garden, gas, general, generation, 45 | get, girl, give, glass, go, goal, good, government, great, green, ground, group, 46 | grow, growth, guess, gun, guy, hair, half, hand, hang, happen, happy, hard, 47 | have, he, head, health, hear, heart, heat, heavy, help, her, here, herself, 48 | high, him, himself, his, history, hit, hold, home, hope, hospital, hot, hotel, 49 | hour, house, how, however, huge, human, hundred, husband, I, idea, identify, if, 50 | image, imagine, impact, important, improve, in, include, including, increase, 51 | indeed, indicate, individual, industry, information, inside, instead, 52 | institution, interest, interesting, international, interview, into, investment, 53 | involve, issue, it, item, its, itself, job, join, just, keep, key, kid, kill, 54 | kind, kitchen, know, knowledge, land, language, large, last, late, later, laugh, 55 | law, lawyer, lay, lead, leader, learn, least, leave, left, leg, legal, less, 56 | let, letter, level, lie, life, light, like, likely, line, list, listen, little, 57 | live, local, long, look, lose, loss, lot, love, low, machine, magazine, main, 58 | maintain, major, majority, make, man, manage, management, manager, many, market, 59 | marriage, material, matter, may, maybe, me, mean, measure, media, medical, meet, 60 | meeting, member, memory, mention, message, method, middle, might, military, 61 | million, mind, minute, miss, mission, model, modern, moment, money, month, more, 62 | morning, most, mother, mouth, move, movement, movie, Mr, Mrs, much, music, must, 63 | my, myself, name, nation, national, natural, nature, near, nearly, necessary, 64 | need, network, never, new, news, newspaper, next, nice, night, no, none, nor, 65 | north, not, note, nothing, notice, now, n't, number, occur, of, off, offer, 66 | office, officer, official, often, oh, oil, ok, old, on, once, one, only, onto, 67 | open, operation, opportunity, option, or, order, organization, other, others, 68 | our, out, outside, over, own, owner, page, pain, painting, paper, parent, part, 69 | participant, particular, particularly, partner, party, pass, past, patient, 70 | pattern, pay, peace, people, per, perform, performance, perhaps, period, person, 71 | personal, phone, physical, pick, picture, piece, place, plan, plant, play, 72 | player, PM, point, police, policy, political, politics, poor, popular, 73 | population, position, positive, possible, power, practice, prepare, present, 74 | president, pressure, pretty, prevent, price, private, probably, problem, 75 | process, produce, product, production, professional, professor, program, 76 | project, property, protect, prove, provide, public, pull, purpose, push, put, 77 | quality, question, quickly, quite, race, radio, raise, range, rate, rather, 78 | reach, read, ready, real, reality, realize, really, reason, receive, recent, 79 | recently, recognize, record, red, reduce, reflect, region, relate, relationship, 80 | religious, remain, remember, remove, report, represent, Republican, require, 81 | research, resource, respond, response, responsibility, rest, result, return, 82 | reveal, rich, right, rise, risk, road, rock, role, room, rule, run, safe, same, 83 | save, say, scene, school, science, scientist, score, sea, season, seat, second, 84 | section, security, see, seek, seem, sell, send, senior, sense, series, serious, 85 | serve, service, set, seven, several, sex, sexual, shake, share, she, shoot, 86 | short, shot, should, shoulder, show, side, sign, significant, similar, simple, 87 | simply, since, sing, single, sister, sit, site, situation, six, size, skill, 88 | skin, small, smile, so, social, society, soldier, some, somebody, someone, 89 | something, sometimes, son, song, soon, sort, sound, source, south, southern, 90 | space, speak, special, specific, speech, spend, sport, spring, staff, stage, 91 | stand, standard, star, start, state, statement, station, stay, step, still, 92 | stock, stop, store, story, strategy, street, strong, structure, student, study, 93 | stuff, style, subject, success, successful, such, suddenly, suffer, suggest, 94 | summer, support, sure, surface, system, table, take, talk, task, tax, teach, 95 | teacher, team, technology, television, tell, ten, tend, term, test, than, thank, 96 | that, the, their, them, themselves, then, theory, there, these, they, thing, 97 | think, third, this, those, though, thought, thousand, threat, three, through, 98 | throughout, throw, thus, time, to, today, together, tonight, too, top, total, 99 | tough, toward, town, trade, traditional, training, travel, treat, treatment, 100 | tree, trial, trip, trouble, true, truth, try, turn, TV, two, type, under, 101 | understand, unit, until, up, upon, us, use, usually, value, various, very, 102 | victim, view, violence, visit, voice, vote, wait, walk, wall, want, war, watch, 103 | water, way, we, weapon, wear, week, weight, well, west, western, what, whatever, 104 | when, where, whether, which, while, white, who, whole, whom, whose, why, wide, 105 | wife, will, win, wind, window, wish, with, within, without, woman, wonder, word, 106 | work, worker, world, worry, would, write, writer, wrong, yard, yeah, year, yes, 107 | yet, you, young, your, yourself"# 108 | .split(',') 109 | .map(|word| word.trim()) 110 | .collect(); 111 | 112 | let mut word_pairs: Vec<_> = Vec::new(); 113 | for word in &words { 114 | for other_word in &words { 115 | word_pairs.push(word.to_string() + " " + other_word); 116 | } 117 | } 118 | assert_eq!(1_000_000, word_pairs.len()); 119 | word_pairs 120 | } 121 | 122 | #[allow(unused)] // False positive 123 | fn test_hash_common_words(build_hasher: &B) { 124 | let word_pairs: Vec<_> = gen_word_pairs(); 125 | check_for_collisions(build_hasher, &word_pairs, 32); 126 | } 127 | 128 | #[allow(unused)] // False positive 129 | fn check_for_collisions(build_hasher: &B, items: &[H], bucket_count: usize) { 130 | let mut buckets = vec![0; bucket_count]; 131 | for item in items { 132 | let value = hash(item, build_hasher) as usize; 133 | buckets[value % bucket_count] += 1; 134 | } 135 | let mean = items.len() / bucket_count; 136 | let max = *buckets.iter().max().unwrap(); 137 | let min = *buckets.iter().min().unwrap(); 138 | assert!( 139 | (min as f64) > (mean as f64) * 0.95, 140 | "min: {}, max:{}, {:?}", 141 | min, 142 | max, 143 | buckets 144 | ); 145 | assert!( 146 | (max as f64) < (mean as f64) * 1.05, 147 | "min: {}, max:{}, {:?}", 148 | min, 149 | max, 150 | buckets 151 | ); 152 | } 153 | 154 | #[cfg(specialize)] 155 | #[allow(unused)] // False positive 156 | fn hash(b: &H, build_hasher: &B) -> u64 { 157 | build_hasher.hash_one(b) 158 | } 159 | 160 | #[cfg(not(specialize))] 161 | #[allow(unused)] // False positive 162 | fn hash(b: &H, build_hasher: &B) -> u64 { 163 | let mut hasher = build_hasher.build_hasher(); 164 | b.hash(&mut hasher); 165 | hasher.finish() 166 | } 167 | 168 | #[test] 169 | fn test_bucket_distribution() { 170 | let build_hasher = RandomState::with_seeds(1, 2, 3, 4); 171 | test_hash_common_words(&build_hasher); 172 | let sequence: Vec<_> = (0..320000).collect(); 173 | check_for_collisions(&build_hasher, &sequence, 32); 174 | let sequence: Vec<_> = (0..2560000).collect(); 175 | check_for_collisions(&build_hasher, &sequence, 256); 176 | let sequence: Vec<_> = (0..320000).map(|i| i * 1024).collect(); 177 | check_for_collisions(&build_hasher, &sequence, 32); 178 | let sequence: Vec<_> = (0..2560000_u64).map(|i| i * 1024).collect(); 179 | check_for_collisions(&build_hasher, &sequence, 256); 180 | } 181 | 182 | #[cfg(feature = "std")] 183 | #[test] 184 | fn test_ahash_alias_map_construction() { 185 | let mut map = ahash::HashMap::default(); 186 | map.insert(1, "test"); 187 | use ahash::HashMapExt; 188 | let mut map = ahash::HashMap::with_capacity(1234); 189 | map.insert(1, "test"); 190 | } 191 | 192 | #[cfg(feature = "std")] 193 | #[test] 194 | fn test_ahash_alias_set_construction() { 195 | let mut set = ahash::HashSet::default(); 196 | set.insert(1); 197 | 198 | use ahash::HashSetExt; 199 | let mut set = ahash::HashSet::with_capacity(1235); 200 | set.insert(1); 201 | } 202 | 203 | 204 | #[cfg(feature = "std")] 205 | #[test] 206 | fn test_key_ref() { 207 | let mut map = ahash::HashMap::default(); 208 | map.insert(1, "test"); 209 | assert_eq!(Some((1, "test")), map.remove_entry(&1)); 210 | 211 | let mut map = ahash::HashMap::default(); 212 | map.insert(&1, "test"); 213 | assert_eq!(Some((&1, "test")), map.remove_entry(&&1)); 214 | 215 | let mut m = ahash::HashSet::>::default(); 216 | m.insert(Box::from("hello".to_string())); 217 | assert!(m.contains(&"hello".to_string())); 218 | 219 | let mut m = ahash::HashSet::::default(); 220 | m.insert("hello".to_string()); 221 | assert!(m.contains("hello")); 222 | 223 | let mut m = ahash::HashSet::>::default(); 224 | m.insert(Box::from(&b"hello"[..])); 225 | assert!(m.contains(&b"hello"[..])); 226 | } 227 | 228 | #[cfg(feature = "std")] 229 | #[test] 230 | fn test_byte_dist() { 231 | use rand::{SeedableRng, Rng, RngCore}; 232 | use pcg_mwc::Mwc256XXA64; 233 | 234 | let mut r = Mwc256XXA64::seed_from_u64(0xe786_c22b_119c_1479); 235 | let mut lowest = 2.541; 236 | let mut highest = 2.541; 237 | for _round in 0..100 { 238 | let mut table: [bool; 256 * 8] = [false; 256 * 8]; 239 | let hasher = RandomState::with_seeds(r.gen(), r.gen(), r.gen(), r.gen()); 240 | for i in 0..128 { 241 | let mut keys: [u8; 8] = hasher.hash_one((i as u64) << 30).to_ne_bytes(); 242 | //let mut keys = r.next_u64().to_ne_bytes(); //This is a control to test assert sensitivity. 243 | for idx in 0..8 { 244 | while table[idx * 256 + keys[idx] as usize] { 245 | keys[idx] = keys[idx].wrapping_add(1); 246 | } 247 | table[idx * 256 + keys[idx] as usize] = true; 248 | } 249 | } 250 | 251 | for idx in 0..8 { 252 | let mut len = 0; 253 | let mut total_len = 0; 254 | let mut num_seq = 0; 255 | for i in 0..256 { 256 | if table[idx * 256 + i] { 257 | len += 1; 258 | } else if len != 0 { 259 | num_seq += 1; 260 | total_len += len; 261 | len = 0; 262 | } 263 | } 264 | let mean = total_len as f32 / num_seq as f32; 265 | println!("Mean sequence length = {}", mean); 266 | if mean > highest { 267 | highest = mean; 268 | } 269 | if mean < lowest { 270 | lowest = mean; 271 | } 272 | } 273 | } 274 | assert!(lowest > 1.9, "Lowest = {}", lowest); 275 | assert!(highest < 3.9, "Highest = {}", highest); 276 | } 277 | 278 | 279 | fn ahash_vec(b: &Vec) -> u64 { 280 | let mut total: u64 = 0; 281 | for item in b { 282 | let mut hasher = RandomState::with_seeds(12, 34, 56, 78).build_hasher(); 283 | item.hash(&mut hasher); 284 | total = total.wrapping_add(hasher.finish()); 285 | } 286 | total 287 | } 288 | 289 | fn fxhash_vec(b: &Vec) -> u64 { 290 | let mut total: u64 = 0; 291 | for item in b { 292 | let mut hasher = FxHasher::default(); 293 | item.hash(&mut hasher); 294 | total = total.wrapping_add(hasher.finish()); 295 | } 296 | total 297 | } 298 | 299 | fn bench_ahash_words(c: &mut Criterion) { 300 | let words = gen_word_pairs(); 301 | c.bench_function("aes_words", |b| b.iter(|| black_box(ahash_vec(&words)))); 302 | } 303 | 304 | fn bench_fx_words(c: &mut Criterion) { 305 | let words = gen_word_pairs(); 306 | c.bench_function("fx_words", |b| b.iter(|| black_box(fxhash_vec(&words)))); 307 | } 308 | 309 | criterion_main!(benches); 310 | criterion_group!(benches, bench_ahash_words, bench_fx_words,); 311 | -------------------------------------------------------------------------------- /tests/nopanic.rs: -------------------------------------------------------------------------------- 1 | use ahash::{AHasher, RandomState}; 2 | use std::hash::{BuildHasher, Hash, Hasher}; 3 | 4 | #[macro_use] 5 | extern crate no_panic; 6 | 7 | #[inline(never)] 8 | #[no_panic] 9 | fn hash_test_final(num: i32, string: &str) -> (u64, u64) { 10 | use core::hash::Hasher; 11 | let mut hasher1 = RandomState::with_seeds(1, 2, 3, 4).build_hasher(); 12 | let mut hasher2 = RandomState::with_seeds(3, 4, 5, 6).build_hasher(); 13 | hasher1.write_i32(num); 14 | hasher2.write(string.as_bytes()); 15 | (hasher1.finish(), hasher2.finish()) 16 | } 17 | 18 | #[inline(never)] 19 | fn hash_test_final_wrapper(num: i32, string: &str) { 20 | hash_test_final(num, string); 21 | } 22 | 23 | struct SimpleBuildHasher { 24 | hasher: AHasher, 25 | } 26 | 27 | impl SimpleBuildHasher { 28 | fn hash_one(&self, x: T) -> u64 29 | where 30 | Self: Sized, 31 | { 32 | let mut hasher = self.build_hasher(); 33 | x.hash(&mut hasher); 34 | hasher.finish() 35 | } 36 | } 37 | 38 | impl BuildHasher for SimpleBuildHasher { 39 | type Hasher = AHasher; 40 | 41 | fn build_hasher(&self) -> Self::Hasher { 42 | self.hasher.clone() 43 | } 44 | } 45 | 46 | #[inline(never)] 47 | #[no_panic] 48 | fn hash_test_specialize(num: i32, string: &str) -> (u64, u64) { 49 | let hasher1 = RandomState::with_seeds(1, 2, 3, 4).build_hasher(); 50 | let hasher2 = RandomState::with_seeds(1, 2, 3, 4).build_hasher(); 51 | ( 52 | SimpleBuildHasher { hasher: hasher1 }.hash_one(num), 53 | SimpleBuildHasher { hasher: hasher2 }.hash_one(string.as_bytes()), 54 | ) 55 | } 56 | 57 | #[inline(never)] 58 | fn hash_test_random_wrapper(num: i32, string: &str) { 59 | hash_test_specialize(num, string); 60 | } 61 | 62 | #[inline(never)] 63 | #[no_panic] 64 | fn hash_test_random(num: i32, string: &str) -> (u64, u64) { 65 | let build_hasher1 = RandomState::with_seeds(1, 2, 3, 4); 66 | let build_hasher2 = RandomState::with_seeds(1, 2, 3, 4); 67 | (build_hasher1.hash_one(&num), build_hasher2.hash_one(string.as_bytes())) 68 | } 69 | 70 | #[inline(never)] 71 | fn hash_test_specialize_wrapper(num: i32, string: &str) { 72 | hash_test_specialize(num, string); 73 | } 74 | 75 | #[test] 76 | fn test_no_panic() { 77 | hash_test_final_wrapper(2, "Foo"); 78 | hash_test_specialize_wrapper(2, "Bar"); 79 | hash_test_random(2, "Baz"); 80 | hash_test_random_wrapper(2, "Bat"); 81 | } 82 | --------------------------------------------------------------------------------