├── sha
├── sha256_2.mojo
├── __init__.mojo
└── sha256.mojo
├── md5
├── __init__.mojo
└── md5.mojo
├── o1hash
├── __init__.mojo
└── o1hash.mojo
├── wyhasher
├── __init__.mojo
└── wyhasher.mojo
├── ahasher
├── __init__.mojo
└── ahasher.mojo
├── fnv1a
├── __init__.mojo
└── fnv1a.mojo
├── fxhash
├── __init__.mojo
└── fxhash.mojo
├── benchmark_other_languages
├── js
│ └── hash_functions
│ │ ├── .gitignore
│ │ ├── README.md
│ │ ├── package.json
│ │ └── package-lock.json
├── go
│ └── hash_functions
│ │ └── go.mod
├── c
│ └── hash_functions
│ │ ├── benchmark
│ │ ├── Makefile
│ │ ├── xxhash.c
│ │ ├── xxh3.h
│ │ ├── benchmark_wyhash.cpp
│ │ └── wyhash.h
├── swift
│ └── hash_functions
│ │ ├── .gitignore
│ │ ├── README.md
│ │ ├── .swiftpm
│ │ └── xcode
│ │ │ ├── package.xcworkspace
│ │ │ └── xcshareddata
│ │ │ │ └── IDEWorkspaceChecks.plist
│ │ │ └── xcshareddata
│ │ │ └── xcschemes
│ │ │ └── hash_functions.xcscheme
│ │ └── Package.swift
├── rust
│ └── hash_functions
│ │ ├── Cargo.toml
│ │ └── Cargo.lock
└── python
│ └── hash_functions
│ ├── md5_benchmark.py
│ ├── benchmark_dict.py
│ └── benchmark.py
├── images
├── hash_map.png
└── hash_functions.png
├── HashFunctionsAndWhereToFindThem.pdf
├── LICENSE
├── checkout_remote_modules.sh
├── test_sha256.mojo
├── test_md5.mojo
├── benchmark_hash_words_file.mojo
├── .gitignore
├── benchmark_hash_functions.mojo
├── test_sha256_2.mojo
├── README.md
└── my_utils
└── __init__.mojo
/sha/sha256_2.mojo:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/md5/__init__.mojo:
--------------------------------------------------------------------------------
1 | from .md5 import md5_string
--------------------------------------------------------------------------------
/o1hash/__init__.mojo:
--------------------------------------------------------------------------------
1 | from .o1hash import o1_hash
--------------------------------------------------------------------------------
/sha/__init__.mojo:
--------------------------------------------------------------------------------
1 | from .sha256 import sha256_encode
--------------------------------------------------------------------------------
/wyhasher/__init__.mojo:
--------------------------------------------------------------------------------
1 | from .wyhasher import wyhash
--------------------------------------------------------------------------------
/ahasher/__init__.mojo:
--------------------------------------------------------------------------------
1 | from .ahasher import ahash, AHasher
--------------------------------------------------------------------------------
/fnv1a/__init__.mojo:
--------------------------------------------------------------------------------
1 | from .fnv1a import fnv1a32, fnv1a64
--------------------------------------------------------------------------------
/fxhash/__init__.mojo:
--------------------------------------------------------------------------------
1 | from .fxhash import fxhash32, fxhash64
2 |
--------------------------------------------------------------------------------
/benchmark_other_languages/js/hash_functions/.gitignore:
--------------------------------------------------------------------------------
1 | /node_modules
--------------------------------------------------------------------------------
/images/hash_map.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mzaks/mojo-hash/HEAD/images/hash_map.png
--------------------------------------------------------------------------------
/benchmark_other_languages/go/hash_functions/go.mod:
--------------------------------------------------------------------------------
1 | module hash_functions
2 |
3 | go 1.21.5
4 |
--------------------------------------------------------------------------------
/images/hash_functions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mzaks/mojo-hash/HEAD/images/hash_functions.png
--------------------------------------------------------------------------------
/HashFunctionsAndWhereToFindThem.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mzaks/mojo-hash/HEAD/HashFunctionsAndWhereToFindThem.pdf
--------------------------------------------------------------------------------
/benchmark_other_languages/js/hash_functions/README.md:
--------------------------------------------------------------------------------
1 | First execute `npm install` to fetch the dependencies and then execute `node benchmark.js`.
2 |
--------------------------------------------------------------------------------
/benchmark_other_languages/c/hash_functions/benchmark:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mzaks/mojo-hash/HEAD/benchmark_other_languages/c/hash_functions/benchmark
--------------------------------------------------------------------------------
/benchmark_other_languages/js/hash_functions/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "dependencies": {
3 | "wyhash": "1.0.0",
4 | "xxhashjs": "0.2.2"
5 | }
6 | }
7 |
--------------------------------------------------------------------------------
/benchmark_other_languages/swift/hash_functions/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | /.build
3 | /Packages
4 | /*.xcodeproj
5 | xcuserdata/
6 | DerivedData/
7 | .swiftpm/config/registries.json
8 | .swiftpm/xcode/package.xcworkspace/contents.xcworkspacedata
9 | .netrc
10 |
--------------------------------------------------------------------------------
/benchmark_other_languages/swift/hash_functions/README.md:
--------------------------------------------------------------------------------
1 | # hash_functions
2 |
3 | Benachmark for default hash function.
4 |
5 | Installing Swift on Ubunutu https://gist.github.com/Jswizzy/408af5829970f9eb18f9b45f891910bb (pick the latest version, tried with 5.9.2)
6 |
7 | Run the benchamrk with `swift run --configuration release`
8 |
--------------------------------------------------------------------------------
/benchmark_other_languages/swift/hash_functions/.swiftpm/xcode/package.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | IDEDidComputeMac32BitWarning
6 |
7 |
8 |
9 |
--------------------------------------------------------------------------------
/benchmark_other_languages/rust/hash_functions/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "hash_functions"
3 | version = "0.1.0"
4 | edition = "2021"
5 |
6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
7 |
8 | [dependencies]
9 | rand = "0.8.5"
10 | fxhash = "0.2.1"
11 | ahash = "0.8.6"
12 | wyhash2 = "0.2.1"
13 | md5 = "0.7.0"
14 |
15 | [profile.dev]
16 | opt-level = 3
--------------------------------------------------------------------------------
/benchmark_other_languages/python/hash_functions/md5_benchmark.py:
--------------------------------------------------------------------------------
1 | import hashlib
2 | import time
3 |
4 | if __name__ == "__main__":
5 | file = open("/usr/share/dict/words", "r")
6 | content = file.read().encode()
7 | tik = time.time_ns()
8 | result = hashlib.md5(content)
9 | tok = time.time_ns()
10 | print(result.hexdigest())
11 | print(f"In: {tok - tik}")
12 |
13 | tik = time.time_ns()
14 | result = hashlib.sha256(content)
15 | tok = time.time_ns()
16 | print(result.hexdigest())
17 | print(f"In: {tok - tik}")
18 |
19 |
--------------------------------------------------------------------------------
/benchmark_other_languages/swift/hash_functions/Package.swift:
--------------------------------------------------------------------------------
1 | // swift-tools-version: 5.8
2 | // The swift-tools-version declares the minimum version of Swift required to build this package.
3 |
4 | import PackageDescription
5 |
6 | let package = Package(
7 | name: "hash_functions",
8 | products: [
9 | .executable(
10 | name: "hash_functions",
11 | targets: ["hash_functions"]),
12 | ],
13 | dependencies: [],
14 | targets: [
15 | .executableTarget(
16 | name: "hash_functions",
17 | dependencies: [])
18 | ]
19 | )
20 |
--------------------------------------------------------------------------------
/o1hash/o1hash.mojo:
--------------------------------------------------------------------------------
1 | fn o1_hash(s: String) -> UInt64:
2 | var p = s.unsafe_ptr()
3 | var bytes = s.byte_length()
4 | if bytes >= 4:
5 | var first = p.bitcast[DType.uint32]()[0]
6 | var middle = p.offset((bytes >> 1) - 2).bitcast[DType.uint32]()[0]
7 | var last = p.offset(bytes - 4).bitcast[DType.uint32]()[0]
8 | return ((first + last) * middle).cast[DType.uint64]()
9 | if bytes:
10 | var tail = (p[0].cast[DType.uint64]() << 16)
11 | | (p[bytes >> 1].cast[DType.uint64]() << 8)
12 | | p[bytes - 1].cast[DType.uint64]()
13 | return tail * 0xa0761d6478bd642
14 | return 0
15 |
--------------------------------------------------------------------------------
/fnv1a/fnv1a.mojo:
--------------------------------------------------------------------------------
1 | alias fnv_32_prime: UInt32 = 0x01000193
2 | alias fnv_32_offset_bassis: UInt32 = 0x811c9dc5
3 | alias fnv_64_prime = 0x100000001b3
4 | alias fnv_64_offset_bassis = 0xcbf29ce484222325
5 |
6 |
7 | @always_inline
8 | fn fnv1a32(s: String) -> UInt32:
9 | var hash = fnv_32_offset_bassis
10 | var buffer = UnsafePointer(s.unsafe_ptr())
11 | for i in range(len(s)):
12 | hash ^= buffer.load(i).cast[DType.uint32]()
13 | hash *= fnv_32_prime
14 | return hash
15 |
16 | @always_inline
17 | fn fnv1a64(s: String) -> UInt64:
18 | var hash: UInt64 = fnv_64_offset_bassis
19 | var buffer = UnsafePointer(s.unsafe_ptr())
20 | for i in range(len(s)):
21 | hash ^= buffer.load(i).cast[DType.uint64]()
22 | hash *= fnv_64_prime
23 | return hash
--------------------------------------------------------------------------------
/benchmark_other_languages/c/hash_functions/Makefile:
--------------------------------------------------------------------------------
1 | # wyhash bench makefile
2 |
3 | CXX = g++
4 | CXXFLAGS = -std=c++11 -O2 -s -Wall -march=native
5 |
6 | TARGETS = wyhash0 wyhash1 wyhash2 xxh3scalar xxh3sse2 xxh3avx2
7 |
8 | all: $(TARGETS)
9 |
10 | wyhash0: benchmark.cpp wyhash.h
11 | $(CXX) benchmark.cpp -o wyhash0 $(CXXFLAGS) -DWYHASH_CONDOM=0
12 |
13 | wyhash1: benchmark.cpp wyhash.h
14 | $(CXX) benchmark.cpp -o wyhash1 $(CXXFLAGS) -DWYHASH_CONDOM=1
15 |
16 | wyhash2: benchmark.cpp wyhash.h
17 | $(CXX) benchmark.cpp -o wyhash2 $(CXXFLAGS) -DWYHASH_CONDOM=2
18 |
19 | xxh3scalar: benchmark.cpp
20 | $(CXX) benchmark.cpp -o xxh3scalar $(CXXFLAGS) -DXXH_VECTOR=0 -DXXH3
21 |
22 | xxh3sse2: benchmark.cpp
23 | $(CXX) benchmark.cpp -o xxh3sse2 $(CXXFLAGS) -DXXH_VECTOR=1 -DXXH3
24 |
25 | xxh3avx2: benchmark.cpp
26 | $(CXX) benchmark.cpp -o xxh3avx2 $(CXXFLAGS) -DXXH_VECTOR=2 -DXXH3
27 |
28 | clean:
29 | rm $(TARGETS)
30 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 Maxim Zaks
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/checkout_remote_modules.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | function check_out_remote_module() (
4 | rurl="$1"
5 | shift
6 | declare -a paths
7 | declare -a module_names
8 | for var in "$@"
9 | do
10 | IFS="="
11 | read -ra module_name_components <<< "$var"
12 | components_count=${#module_name_components[@]}
13 | path=${module_name_components[0]}
14 | module_name=${module_name_components[$components_count-1]}
15 | paths=("${paths[@]}" "$path")
16 | module_names=("${module_names[@]}" "$module_name")
17 | done
18 | IFS=" "
19 |
20 | for module_name in "${module_names[@]}"
21 | do
22 | rm -rf ../$module_name
23 | done
24 |
25 | current_date_time=$(date)
26 | echo "URL: $rurl"
27 | git clone -n --depth=1 --filter=tree:0 $rurl
28 | cd ${rurl##*/}
29 | git sparse-checkout set --no-cone "${paths[@]}"
30 | git checkout
31 |
32 | for i in "${!paths[@]}"
33 | do
34 | module_name=${module_names[$i]}
35 | path=${paths[$i]}
36 | cp -R ./$path ../../$module_name
37 | echo $current_date_time > ../../$module_name/.checkoutinfo
38 | echo "URL: $rurl" >> ../../$module_name/.checkoutinfo
39 | echo "Path: $path" >> ../../$module_name/.checkoutinfo
40 | done
41 | cd ../
42 | )
43 |
44 | function checkout()(
45 | # Add check out remote module calls here
46 |
47 | # check_out_remote_module "https://github.com/mzaks/mojo-trees" "fiby_tree"
48 | )
49 |
50 | mkdir -p "_deps"
51 | cd "_deps"
52 |
53 | checkout
54 |
55 | rm -rf "../_deps"
--------------------------------------------------------------------------------
/fxhash/fxhash.mojo:
--------------------------------------------------------------------------------
1 | from bit import rotate_bits_left
2 |
3 | alias ROTATE = 5
4 | alias SEED64 = 0x51_7c_c1_b7_27_22_0a_95
5 | alias SEED32 = 0x9e_37_79_b9
6 |
7 | @always_inline
8 | fn fxhash32(s: String, seed: UInt32 = 0) -> UInt32:
9 | var bytes = UnsafePointer(s.unsafe_ptr())
10 | var count = len(s)
11 | var hash = seed
12 | while count >= 4:
13 | hash = _hash_word32(hash, bytes.bitcast[DType.uint32]().load())
14 | bytes = bytes.offset(4)
15 | count -= 4
16 | if count >= 2:
17 | hash = _hash_word32(hash, bytes.bitcast[DType.uint16]().load().cast[DType.uint32]())
18 | bytes = bytes.offset(2)
19 | count -= 2
20 | if count > 0:
21 | hash = _hash_word32(hash, bytes.load().cast[DType.uint32]())
22 | return hash
23 |
24 | @always_inline
25 | fn fxhash64(s: String, seed: UInt64 = 0) -> UInt64:
26 | var bytes = UnsafePointer(s.unsafe_ptr())
27 | var count = len(s)
28 | var hash = seed
29 | while count >= 8:
30 | hash = _hash_word64(hash, bytes.bitcast[DType.uint64]().load())
31 | bytes = bytes.offset(8)
32 | count -= 8
33 | if count >= 4:
34 | hash = _hash_word64(hash, bytes.bitcast[DType.uint32]().load().cast[DType.uint64]())
35 | bytes = bytes.offset(4)
36 | count -= 4
37 | if count >= 2:
38 | hash = _hash_word64(hash, bytes.bitcast[DType.uint16]().load().cast[DType.uint64]())
39 | bytes = bytes.offset(2)
40 | count -= 2
41 | if count > 0:
42 | hash = _hash_word64(hash, bytes.load().cast[DType.uint64]())
43 | return hash
44 |
45 |
46 | @always_inline
47 | fn _hash_word32(value: UInt32, word: UInt32) -> UInt32:
48 | return (rotate_bits_left[ROTATE](value) ^ word) * SEED32
49 |
50 | @always_inline
51 | fn _hash_word64(value: UInt64, word: UInt64) -> UInt64:
52 | return (rotate_bits_left[ROTATE](value) ^ word) * SEED64
53 |
--------------------------------------------------------------------------------
/benchmark_other_languages/c/hash_functions/xxhash.c:
--------------------------------------------------------------------------------
1 | /*
2 | * xxHash - Extremely Fast Hash algorithm
3 | * Copyright (C) 2012-2023 Yann Collet
4 | *
5 | * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
6 | *
7 | * Redistribution and use in source and binary forms, with or without
8 | * modification, are permitted provided that the following conditions are
9 | * met:
10 | *
11 | * * Redistributions of source code must retain the above copyright
12 | * notice, this list of conditions and the following disclaimer.
13 | * * Redistributions in binary form must reproduce the above
14 | * copyright notice, this list of conditions and the following disclaimer
15 | * in the documentation and/or other materials provided with the
16 | * distribution.
17 | *
18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | *
30 | * You can contact the author at:
31 | * - xxHash homepage: https://www.xxhash.com
32 | * - xxHash source repository: https://github.com/Cyan4973/xxHash
33 | */
34 |
35 | /*
36 | * xxhash.c instantiates functions defined in xxhash.h
37 | */
38 |
39 | #define XXH_STATIC_LINKING_ONLY /* access advanced declarations */
40 | #define XXH_IMPLEMENTATION /* access definitions */
41 |
42 | #include "xxhash.h"
--------------------------------------------------------------------------------
/test_sha256.mojo:
--------------------------------------------------------------------------------
1 | # import time
2 | # from sha import sha256_encode
3 | # from testing import assert_equal
4 | # from collections.vector import InlinedFixedVector
5 |
6 | # fn print_hex(digest: InlinedFixedVector[UInt8, 32]):
7 | # var lookup = String("0123456789abcdef")
8 | # var result: String = ""
9 | # for i in range(len(digest)):
10 | # var v = digest[i].to_int()
11 | # result += lookup[(v >> 4)]
12 | # result += lookup[v & 15]
13 |
14 | # print(result)
15 | # print(len(digest))
16 | # print(len("b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9"))
17 | # print(len("985752100505598575751521005110148569753501015350100551009755100979810297995256521011021015155975351564810110157485656102559799101501011029910010157"))
18 |
19 | # fn main():
20 | # var bytes = 1024 * 1024 * 256 + 78
21 | # var bytes_to_hash: DynamicVector[UInt8] = kinda_random_bytes(bytes)
22 | # var ptr = DTypePointer[DType.uint8](bytes_to_hash.data.value)
23 | # var buffer = Buffer[DType.uint8](ptr, bytes_to_hash.size)
24 | # var before = time.now()
25 | # var hash = sha256_encode(ptr, bytes)
26 | # var after = time.now()
27 | # var keep_vector_alive = bytes_to_hash[4]
28 | # var ns = after - before
29 | # var seconds = ns / 1_000_000_000
30 | # var megabytes = bytes / 1_000_000
31 | # for i in range(hash.size):
32 | # print(hash[i])
33 | # print("megabytes per second")
34 | # print(megabytes / seconds)
35 | # var text = "hello world"
36 | # print(text)
37 | # print_hex(sha256_encode(text.data().bitcast[DType.uint8](), len(text)))
38 |
39 |
40 | # fn kinda_random_bytes(length: Int) -> DynamicVector[UInt8]:
41 | # var vec = DynamicVector[UInt8](capacity=length)
42 | # var n: UInt8 = 245
43 | # var cycle: UInt8 = 1
44 | # for i in range(length):
45 | # var shifted = n >> 3
46 | # var shiftalso = n << 4
47 | # var more = shifted ^ n ^ shiftalso
48 | # var next = n + more
49 | # n = next
50 | # cycle ^= n
51 | # vec.append(n + cycle)
52 |
53 | # return vec
--------------------------------------------------------------------------------
/benchmark_other_languages/js/hash_functions/package-lock.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "hash_functions",
3 | "lockfileVersion": 2,
4 | "requires": true,
5 | "packages": {
6 | "": {
7 | "dependencies": {
8 | "wyhash": "1.0.0",
9 | "xxhashjs": "0.2.2"
10 | }
11 | },
12 | "node_modules/cuint": {
13 | "version": "0.2.2",
14 | "resolved": "https://registry.npmjs.org/cuint/-/cuint-0.2.2.tgz",
15 | "integrity": "sha512-d4ZVpCW31eWwCMe1YT3ur7mUDnTXbgwyzaL320DrcRT45rfjYxkt5QWLrmOJ+/UEAI2+fQgKe/fCjR8l4TpRgw=="
16 | },
17 | "node_modules/wyhash": {
18 | "version": "1.0.0",
19 | "resolved": "https://registry.npmjs.org/wyhash/-/wyhash-1.0.0.tgz",
20 | "integrity": "sha512-3mxXnm7JQTAkxyWcq+POKqUq1cU+Wd9jyhRdAHz2xGuwL1cGjK/xhr73c+/JljnKYaZmmyq6v0Vv3l6t64w8ZQ=="
21 | },
22 | "node_modules/xxhashjs": {
23 | "version": "0.2.2",
24 | "resolved": "https://registry.npmjs.org/xxhashjs/-/xxhashjs-0.2.2.tgz",
25 | "integrity": "sha512-AkTuIuVTET12tpsVIQo+ZU6f/qDmKuRUcjaqR+OIvm+aCBsZ95i7UVY5WJ9TMsSaZ0DA2WxoZ4acu0sPH+OKAw==",
26 | "dependencies": {
27 | "cuint": "^0.2.2"
28 | }
29 | }
30 | },
31 | "dependencies": {
32 | "cuint": {
33 | "version": "0.2.2",
34 | "resolved": "https://registry.npmjs.org/cuint/-/cuint-0.2.2.tgz",
35 | "integrity": "sha512-d4ZVpCW31eWwCMe1YT3ur7mUDnTXbgwyzaL320DrcRT45rfjYxkt5QWLrmOJ+/UEAI2+fQgKe/fCjR8l4TpRgw=="
36 | },
37 | "wyhash": {
38 | "version": "1.0.0",
39 | "resolved": "https://registry.npmjs.org/wyhash/-/wyhash-1.0.0.tgz",
40 | "integrity": "sha512-3mxXnm7JQTAkxyWcq+POKqUq1cU+Wd9jyhRdAHz2xGuwL1cGjK/xhr73c+/JljnKYaZmmyq6v0Vv3l6t64w8ZQ=="
41 | },
42 | "xxhashjs": {
43 | "version": "0.2.2",
44 | "resolved": "https://registry.npmjs.org/xxhashjs/-/xxhashjs-0.2.2.tgz",
45 | "integrity": "sha512-AkTuIuVTET12tpsVIQo+ZU6f/qDmKuRUcjaqR+OIvm+aCBsZ95i7UVY5WJ9TMsSaZ0DA2WxoZ4acu0sPH+OKAw==",
46 | "requires": {
47 | "cuint": "^0.2.2"
48 | }
49 | }
50 | }
51 | }
52 |
--------------------------------------------------------------------------------
/test_md5.mojo:
--------------------------------------------------------------------------------
1 | from md5 import md5_string
2 | from testing import assert_equal
3 | from wyhasher import wyhash
4 | from wyhasher.wyhasher import wymum
5 |
6 | alias alphabete: String = "0123456789abcdef"
7 |
8 | fn to_hex(v: SIMD[DType.uint8, 16]) -> String:
9 | var result: String = ""
10 | for i in range(16):
11 | var h = v[i] >> 4
12 | var l = v[i] & 15
13 | result += alphabete[int(h)]
14 | result += alphabete[int(l)]
15 | return result
16 |
17 | fn main() raises:
18 | var a: String = "Hello 🔥"
19 | assert_equal(to_hex(md5_string(a)), "b9735ea236e0d3103a39ad102a2e990f")
20 | _ = a
21 | var b: String = '米くを舵4物委らご氏松ハナテフ月関ソ時平ふいの博情れじフ牟万い元56園フメヤオ試図ロツヤ未備王こと傷喫羅踊んゆし。栃ユヱオ書著作ユソツロ英祉業ア大課ご権質フべ空8午キ切軟づン著郎そゃす格町採ヱオマコ処8付国ムハチア究表でなだ際無ロミヱ地兵ぴげ庭体すク発抜爆位や。楽富むゆず盛航カナセ携代ハ本高きた員59今骸ンラえぜ城解イケ穴訴ぽぎ属住ヤケトヌ抱点ト広注厚でて。 国リ出難セユメ軍手ヘカウ画形サヲシ猛85用ヲキミ心死よしと身処ケヨミオ教主ーぽ事業んく字国たさょ図能シミスヤ社8板ル岡世58次戒知院んれり。市メ誘根カ数問禁竹ゃれえみ給辺のでみき今二ぎさ裕止過こクすと無32郎所ラた生展ヌヘス成度慣葬勇厘ばてか。室ゃ下携疲ム色権がぽりっ銃週ノオ姫千テム健蔵い研手ッ放容ル告属め旅側26企サノヨ宅都福ぞ通待ちぴね種脳イど労希望義通むン。 罰しい続負せ著低たル異師ユハワ東添質コチ転集ルヤ雇聴約ヒ前統らた情厳ゆさでや真胸や有披暑棚豆ゆぼたけ。盛ワセロナ情競クるっわ講3音ずをせ少地めしぜょ手63明視れに判企ヒヌエソ求総58特本ね井比ユラキ禁頭馬るゅリす能率率かがさわ。葉サソ医郡ヱヘソ労帰ナケスミ救写ワヘ株審ネヒニミ安逮イ人画ラ涯車はラ極騒りなド件5級ンかふー劇41著ぱぐ凱討だ文世ぶづどま界善魅マ渓経競融れがや。 連ーぜらご模分ッ視外ばフく運発群ほぼづ育越一ほごクけ案募ヲイソ治会イせフ製君ぜた漢村1変リヒ構5際ツ御文ヲ臭入さドぼ代書ハケ引技ろみれ回観注倉徹ぱ。論ラづ海要サ情座ゃり齢宣ラモエ芸化エマホ覧催回ら戦69本外ト葬岳な政画か連針ぴリフず。約ル闘辺ぽ経2応掲ホサアラ塾小コラ画決クノオ上室レヌヱ勝逮ぜるえむ責豊チノ明意ひけ訟6碁草メタチエ財午召喝塊む。 決めでわ名金つけレわ続人県約ぽぼす尾腹ユサ戦載リシ護賀レモフツ重涯ニ治者むんっみ職更カタチレ提話2何ワ責東まけげふ能政ヌ供禁がびてわ提改倶れめ。読み担後ぽ安加ぎ論鹿ツ統最お気麻月つじもあ竜思いろめ判必満理トコ文連ムイウハ寄串ざほびー。文ゆこっ向27年メイ便能ノセヲ待1王スねたゆ伝派んね点過カト治読よにきべ使人スシ都言え阻8割べづえみ注引敷的岳犠眠どそ。 学用イだ医客開ロ供界もぞだ実隆モイヌ務坂ナコヲ権野ろづ初場ぱ低会づぱじ新倒コ化政レ止奮浸猪ッわえづ。形いやリ要帰ほまむだ業領スル必打さ島14巻リ集日ネヘホタ面幅ち写上そぴ円図ムタコモ報使イわざと会催ヤヲ康証をドぶレ盤岡ホハツ作29管しをめ公問懐蓄っさ。来ゆぼあぱ投秋シ語右ぐ身靖かば辛握捕家記ヘワ神岐囲づ毘観メテクツ政73夕罪57需93誌飲査仁さ。 変レめ束球よんま会特ヱコ聞重だ史純ーどる件32浦レぴよゃ上強ネラリロ査従セユヤ専棋光レ作表ひぶ予正ぜーな誉確フス函6報円ス進治ね能営済否雄でわょ。42生型ば着続ア短実ぎおめび前環闘ラヤヲル診均っとにの声公トヱテマ整試椅情久妊舌頃ざとっく。品キチトテ阿国ラら受87世ヲフセリ川86個ーょぼげ危子ヘレカメ無会ぱかへ事通んかて電条ロツ徴商ぶぞそを居暑メ害広せもがり禁応レミヲ応響割壮憶はぱ。 千れンが織財メニ況界ネトレミ学豊フオホシ近月レたやご的罪ょな菱技ちる警栗エセ提89林危氷48参ア説森クキヒヱ薬社ホコエリ負和ルび紀下ケミイ掲歳特ごず扱底ク護木連ちクを各形ばすか。変ぱなれ町7融ヌ街準以タユヘム質裕ぶで遺語俊ぎずょ事金文キ写多山ーゆに歩帯すで会世クぜよ論写ヲ達71林危氷5間続ぎぜび高怠す。 係8青け応著ミ戦条ナヘネカ思79未ぎ算伊をゃ泉人ーづ需説っ畑鹿27軽ラソツ権2促千護ルロナカ開国ケ暴嶋ご池表だ。佐フナ訪麻はてせば勝効をあ医戦画とさわぴ者両すいあ並来んば載食ぴ件友頂業へえぞ魚祝ネラ聞率スコリケ始全ンこび夫出ドふ今布うぎふゅ実克即哉循やしんな。 暮す備54依紀てッん末刊と柔称むてス無府ケイ変壌をぱ汁連フマス海世ヌ中負知問ナヘケ純推ひ読着ヒ言若私軽れ。掲けフむ王本オコ線人をっさ必和断セソヲハ図芸ちかな防長りぶは投新意相ツ並5余セ職岳ぞ端古空援そ。森ヨエチ題5東っ自兄ち暴5近鹿横ト的京ハ安氷ナキ深際ぎ並節くスむの権工ほルせ京49効タムチ処三ぞぴラ済国ずっ文経ヘトミ水分準そが。'
22 | assert_equal(to_hex(md5_string(b)), "168f7f85febeb19dbad38502499ea1d0")
23 | _ = b
24 |
--------------------------------------------------------------------------------
/benchmark_other_languages/c/hash_functions/xxh3.h:
--------------------------------------------------------------------------------
1 | /*
2 | * xxHash - Extremely Fast Hash algorithm
3 | * Development source file for `xxh3`
4 | * Copyright (C) 2019-2021 Yann Collet
5 | *
6 | * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
7 | *
8 | * Redistribution and use in source and binary forms, with or without
9 | * modification, are permitted provided that the following conditions are
10 | * met:
11 | *
12 | * * Redistributions of source code must retain the above copyright
13 | * notice, this list of conditions and the following disclaimer.
14 | * * Redistributions in binary form must reproduce the above
15 | * copyright notice, this list of conditions and the following disclaimer
16 | * in the documentation and/or other materials provided with the
17 | * distribution.
18 | *
19 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | *
31 | * You can contact the author at:
32 | * - xxHash homepage: https://www.xxhash.com
33 | * - xxHash source repository: https://github.com/Cyan4973/xxHash
34 | */
35 |
36 | /*
37 | * Note: This file used to host the source code of XXH3_* variants.
38 | * during the development period.
39 | * The source code is now properly integrated within xxhash.h.
40 | *
41 | * xxh3.h is no longer useful,
42 | * but it is still provided for compatibility with source code
43 | * which used to include it directly.
44 | *
45 | * Programs are now highly discouraged to include xxh3.h.
46 | * Include `xxhash.h` instead, which is the officially supported interface.
47 | *
48 | * In the future, xxh3.h will start to generate warnings, then errors,
49 | * then it will be removed from source package and from include directory.
50 | */
51 |
52 | /* Simulate the same impact as including the old xxh3.h source file */
53 |
54 | #define XXH_INLINE_ALL
55 | #include "xxhash.h"
--------------------------------------------------------------------------------
/benchmark_hash_words_file.mojo:
--------------------------------------------------------------------------------
1 | from time import now
2 | from md5 import md5_string
3 | from wyhasher import wyhash
4 | from ahasher import ahash
5 | from fxhash import fxhash64
6 | from sha import sha256_encode
7 |
8 | import benchmark
9 | from benchmark import Unit
10 | from pathlib import Path
11 | from collections.vector import InlinedFixedVector
12 |
13 | fn to_hex(digest: InlinedFixedVector[UInt8, 32]) -> String:
14 | var lookup = String("0123456789abcdef")
15 | var result: String = ""
16 | for i in range(len(digest)):
17 | var v = int(digest[i])
18 | result += lookup[(v >> 4)]
19 | result += lookup[v & 15]
20 | return result
21 |
22 | fn to_hex(digest: SIMD[DType.uint8, 16]) -> String:
23 | var lookup = String("0123456789abcdef")
24 | var result: String = ""
25 | for i in range(len(digest)):
26 | var v = int(digest[i])
27 | result += lookup[(v >> 4)]
28 | result += lookup[v & 15]
29 | return result
30 |
31 | fn main() raises:
32 | var text = Path("/usr/share/dict/words").read_text()
33 | var tik = now()
34 | var h0 = md5_string(text)
35 | var tok = now()
36 | print("MD5 :", tok - tik, to_hex(h0), len(text))
37 |
38 | tik = now()
39 | var h5 = sha256_encode(text.unsafe_ptr(), 0)
40 | tok = now()
41 | print("SHA256 :", tok - tik, to_hex(h5), len(text))
42 |
43 | tik = now()
44 | var h1 = wyhash(text, 0)
45 | tok = now()
46 | print("Wyhash :", tok - tik, h1, len(text))
47 |
48 | tik = now()
49 | var h2 = ahash(text)
50 | tok = now()
51 | print("Ahash :", tok - tik, h2, len(text))
52 |
53 | tik = now()
54 | var h3 = fxhash64(text)
55 | tok = now()
56 | print("Fxhash :", tok - tik, h3, len(text))
57 |
58 | tik = now()
59 | var h4 = hash(text.unsafe_ptr(), len(text))
60 | tok = now()
61 | print("Std hash:", tok - tik, h4, len(text))
62 |
63 | var hb = SIMD[DType.uint8, 16]()
64 |
65 | @parameter
66 | fn md5_test():
67 | hb = md5_string(text)
68 | print("===MD5===")
69 | var report0 = benchmark.run[md5_test]()
70 | report0.print(Unit.ns)
71 | print(hb)
72 |
73 | var hi = 0
74 |
75 | @parameter
76 | fn hash_test():
77 | hi = hash(text.unsafe_ptr(), len(text))
78 |
79 | print("===Std hash===")
80 | var report1 = benchmark.run[hash_test]()
81 | report1.print(Unit.ns)
82 | print(hi)
83 |
84 | var hu = UInt64(0)
85 |
86 | @parameter
87 | fn ahash_test():
88 | hu = ahash(text)
89 |
90 | print("===Ahash===")
91 | var report2 = benchmark.run[ahash_test]()
92 | report2.print(Unit.ns)
93 | print(hu)
94 |
95 | @parameter
96 | fn wyhash_test():
97 | hu = wyhash(text, 0)
98 |
99 | print("===Wyhash===")
100 | var report3 = benchmark.run[wyhash_test]()
101 | report3.print(Unit.ns)
102 | print(hu)
103 |
104 | _ = text
105 |
--------------------------------------------------------------------------------
/benchmark_other_languages/c/hash_functions/benchmark_wyhash.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include
6 | #include
7 | #include
8 | #include
9 | #ifndef XXH3
10 | #include "wyhash.h"
11 | #else
12 | #include "xxh3.h"
13 | #endif
14 |
15 | using namespace std;
16 |
17 | struct ha
18 | {
19 | size_t operator()(const string &s, uint64_t seed)const
20 | {
21 | #ifndef XXH3
22 | return wyhash(s.c_str(),s.size(),seed,_wyp);
23 | #else
24 | return XXH3_64bits_withSeed(s.c_str(),s.size(),seed);
25 | #endif
26 | }
27 | };
28 |
29 | vector v;
30 | template
31 | uint64_t bench_hash(const char *name)
32 | {
33 | Hasher h;
34 | string s;
35 | timeval beg, end;
36 | uint64_t dummy=0;
37 | const uint64_t N=v.size(), R=0x1000;
38 |
39 | cerr.precision(2);
40 | cerr.setf(ios::fixed);
41 | cerr<<'|'<1)
70 | {
71 | if(help_s.compare(argv[1])==0)
72 | {
73 | cout<<"usage:\n"<\n";
74 | cout<<"if no arguments given \'"<>s; !fi.eof(); fi>>s)
87 | if(s.size())
88 | v.push_back(s);
89 | fi.close();
90 | //shuffle the array to benchmark random access
91 | for(size_t i=v.size()-1; i; i--)
92 | swap(v[i],v[rand()%(i+1)]);
93 |
94 | uint64_t r=0;
95 | cerr<("wyhash");
101 | #else
102 | r+=bench_hash("xxh3");
103 | #endif
104 |
105 | return r;
106 | }
--------------------------------------------------------------------------------
/wyhasher/wyhasher.mojo:
--------------------------------------------------------------------------------
1 |
2 | from bit import bit_width, byte_swap
3 | from bit import rotate_bits_right
4 |
5 | alias U128 = SIMD[DType.uint64, 2]
6 | alias U256 = SIMD[DType.uint64, 4]
7 | alias default_secret = SIMD[DType.uint64, 4](0x2d358dccaa6c78a5, 0x8bb84b93962eacc9, 0x4b33a62ed433d4a3, 0x4d5a2da51de1aa47)
8 |
9 | @always_inline
10 | fn wymum_32(inout a: UInt64, inout b: UInt64):
11 | var ab = U128(a, b)
12 | var abl = ab & 0xff_ff_ff_ff
13 | var abh = ab >> 32
14 | var hh = abh.reduce_mul()
15 | var hl = abh[0] * abl[1]
16 | var ll = abl.reduce_mul()
17 | var lh = abl[0] * abh[1]
18 | a, b = rotate_bits_right[32](hl) ^ hh, rotate_bits_right[32](lh) ^ ll
19 |
20 | @always_inline
21 | fn wymum(inout a: UInt64, inout b: UInt64):
22 | var ab = U128(a, b)
23 | var abl = ab & 0xff_ff_ff_ff
24 | var abh = ab >> 32
25 | var hh = abh.reduce_mul()
26 | var hl = abh[0] * abl[1]
27 | var ll = abl.reduce_mul()
28 | var lh = abl[0] * abh[1]
29 | var t = ll + (hl << 32)
30 | var lo = t + (lh << 32)
31 | var c = (t < ll).cast[DType.uint64]()
32 | c += (lo < t).cast[DType.uint64]()
33 | var hi = hh + (hl >> 32) + (lh >> 32) + c
34 | a, b = lo, hi
35 |
36 | @always_inline
37 | fn wy_mix(_a: UInt64, _b: UInt64) -> UInt64:
38 | var a = _a
39 | var b = _b
40 | wymum(a, b)
41 | return a ^ b
42 |
43 | @always_inline
44 | fn wyr8(p: UnsafePointer[UInt8]) -> UInt64:
45 | return p.bitcast[DType.uint64]().load()
46 |
47 | @always_inline
48 | fn wyr4(p: UnsafePointer[UInt8]) -> UInt64:
49 | return p.bitcast[DType.uint32]().load().cast[DType.uint64]()
50 |
51 | @always_inline
52 | fn wyr3(p: UnsafePointer[UInt8], k: Int) -> UInt64:
53 | return (p.load().cast[DType.uint64]() << 16)
54 | | (p.offset(k >> 1).load().cast[DType.uint64]() << 8)
55 | | p.offset(k - 1).load().cast[DType.uint64]()
56 |
57 | fn wyhash(key: String, _seed: UInt64, secret: U256 = default_secret) -> UInt64:
58 | var length = len(key)
59 | var p = UnsafePointer(key.unsafe_ptr())
60 | var seed = _seed ^ wy_mix(_seed ^ secret[0], secret[1])
61 | var a: UInt64 = 0
62 | var b: UInt64 = 0
63 | if length <= 16:
64 | if length >= 4:
65 | var last_part_index = (length >> 3) << 2
66 | a = (wyr4(p) << 32) | wyr4(p.offset(last_part_index))
67 | b = (wyr4(p.offset(length - 4)) << 32) | wyr4(p.offset(length - 4 - last_part_index))
68 | elif length > 0:
69 | a = wyr3(p, length)
70 | else:
71 | var see1 = seed
72 | var see2 = seed
73 |
74 | while length >= 48:
75 | seed = wy_mix(wyr8(p) ^ secret[1], wyr8(p + 8) ^ seed)
76 | see1 = wy_mix(wyr8(p + 16) ^ secret[2], wyr8(p + 24) ^ see1)
77 | see2 = wy_mix(wyr8(p + 32) ^ secret[3], wyr8(p + 40) ^ see2)
78 | p = p.offset(48)
79 | length -= 48
80 |
81 | seed ^= see1 ^ see2
82 |
83 | while length > 16:
84 | var p64 = p.bitcast[DType.uint64]()
85 | var data = p64.load[width=2]()
86 | var seed_values = U128(secret[1], seed)
87 | var seeded_data = data ^ seed_values
88 | seed = wy_mix(seeded_data[0], seeded_data[1])
89 | p = p.offset(16)
90 | length -= 16
91 | a = wyr8(p.offset(length-16))
92 | b = wyr8(p.offset(length-8))
93 |
94 | a ^= secret[1]
95 | b ^= seed
96 | wymum(a, b)
97 |
98 | return wy_mix(a ^ secret[0] ^ len(key), b ^ secret[1])
99 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | share/python-wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | MANIFEST
28 |
29 | # PyInstaller
30 | # Usually these files are written by a python script from a template
31 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 |
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 |
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .nox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *.cover
49 | *.py,cover
50 | .hypothesis/
51 | .pytest_cache/
52 | cover/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | .pybuilder/
76 | target/
77 |
78 | # Jupyter Notebook
79 | .ipynb_checkpoints
80 |
81 | # IPython
82 | profile_default/
83 | ipython_config.py
84 |
85 | # pyenv
86 | # For a library or package, you might want to ignore these files since the code is
87 | # intended to run in multiple environments; otherwise, check them in:
88 | # .python-version
89 |
90 | # pipenv
91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
94 | # install all needed dependencies.
95 | #Pipfile.lock
96 |
97 | # poetry
98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99 | # This is especially recommended for binary packages to ensure reproducibility, and is more
100 | # commonly ignored for libraries.
101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 |
104 | # pdm
105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | # in version control.
109 | # https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 |
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 |
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 |
119 | # SageMath parsed files
120 | *.sage.py
121 |
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 |
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 |
135 | # Rope project settings
136 | .ropeproject
137 |
138 | # mkdocs documentation
139 | /site
140 |
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 |
146 | # Pyre type checker
147 | .pyre/
148 |
149 | # pytype static type analyzer
150 | .pytype/
151 |
152 | # Cython debug symbols
153 | cython_debug/
154 |
155 | # PyCharm
156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | # and can be added to the global gitignore or merged into this file. For a more nuclear
159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 |
--------------------------------------------------------------------------------
/ahasher/ahasher.mojo:
--------------------------------------------------------------------------------
1 | from bit import byte_swap
2 | from bit import rotate_bits_left
3 |
4 | alias U256 = SIMD[DType.uint64, 4]
5 | alias U128 = SIMD[DType.uint64, 2]
6 | alias MULTIPLE = 6364136223846793005
7 | alias ROT = 23
8 |
9 |
10 | @always_inline
11 | fn folded_multiply(s: UInt64, by: UInt64) -> UInt64:
12 | var b1 = s * byte_swap(by)
13 | var b2 = byte_swap(s) * (~by)
14 | return b1 ^ byte_swap(b2)
15 |
16 |
17 | @always_inline
18 | fn read_small(data: UnsafePointer[UInt8], length: Int) -> U128:
19 | if length >= 2:
20 | if length >= 4:
21 | # len 4-8
22 | var a = data.bitcast[DType.uint32]().load().cast[DType.uint64]()
23 | var b = data.offset(length - 4).bitcast[DType.uint32]().load().cast[DType.uint64]()
24 | return U128(a, b)
25 | else:
26 | var a = data.bitcast[DType.uint16]().load().cast[DType.uint64]()
27 | var b = data.offset(length - 1).load().cast[DType.uint64]()
28 | return U128(a, b)
29 | else:
30 | if length > 0:
31 | var a = data.load().cast[DType.uint64]()
32 | return U128(a, a)
33 | else:
34 | return U128(0, 0)
35 |
36 | struct AHasher:
37 | var buffer: UInt64
38 | var pad: UInt64
39 | var extra_keys: U128
40 |
41 | fn __init__(inout self, key: U256):
42 | var pi_key = key ^ U256(0x243f_6a88_85a3_08d3, 0x1319_8a2e_0370_7344, 0xa409_3822_299f_31d0, 0x082e_fa98_ec4e_6c89,)
43 | self.buffer = pi_key[0]
44 | self.pad = pi_key[1]
45 | self.extra_keys = U128(pi_key[2], pi_key[3])
46 |
47 | @always_inline
48 | fn update(inout self, new_data: UInt64):
49 | self.buffer = folded_multiply(new_data ^ self.buffer, MULTIPLE)
50 |
51 | @always_inline
52 | fn large_update(inout self, new_data: U128):
53 | var combined = folded_multiply(
54 | new_data[0] ^ self.extra_keys[0], new_data[1] ^ self.extra_keys[1]
55 | )
56 | self.buffer = rotate_bits_left[ROT]((self.buffer + self.pad) ^ combined)
57 |
58 | @always_inline
59 | fn short_finish(self) -> UInt64:
60 | return self.buffer + self.pad
61 |
62 | @always_inline
63 | fn finish(self) -> UInt64:
64 | var rot = self.buffer & 63
65 | var folded = folded_multiply(self.buffer, self.pad)
66 | return (folded << rot) | (folded >> (64 - rot))
67 |
68 | @always_inline
69 | fn write(inout self, data: UnsafePointer[UInt8], length: Int):
70 | self.buffer = (self.buffer + length) * MULTIPLE
71 | if length > 8:
72 | if length > 16:
73 | var tail = data.offset(length - 16).bitcast[DType.uint64]().load[width=2]()
74 | self.large_update(tail)
75 | var offset = 0
76 | while length - offset > 16:
77 | var block = data.offset(offset).bitcast[DType.uint64]().load[width=2]()
78 | self.large_update(block)
79 | offset += 16
80 | else:
81 | var a = data.bitcast[DType.uint64]().load()
82 | var b = data.offset(length - 8).bitcast[DType.uint64]().load()
83 | self.large_update(U128(a, b))
84 | else:
85 | var value = read_small(data, length)
86 | self.large_update(value)
87 |
88 | @always_inline
89 | fn ahash(s: String) -> UInt64:
90 | var length = len(s)
91 | var b = s.unsafe_ptr()
92 | var hasher = AHasher(U256(0, 0, 0, 0))
93 |
94 | if length > 8:
95 | hasher.write(b, length)
96 | else:
97 | var value = read_small(b, length)
98 | hasher.buffer = folded_multiply(value[0] ^ hasher.buffer, value[1] ^ hasher.extra_keys[1])
99 | hasher.pad = hasher.pad + length
100 |
101 | return hasher.finish()
102 |
--------------------------------------------------------------------------------
/benchmark_other_languages/swift/hash_functions/.swiftpm/xcode/xcshareddata/xcschemes/hash_functions.xcscheme:
--------------------------------------------------------------------------------
1 |
2 |
5 |
8 |
9 |
15 |
21 |
22 |
23 |
29 |
35 |
36 |
37 |
38 |
39 |
45 |
46 |
48 |
54 |
55 |
56 |
57 |
58 |
68 |
70 |
76 |
77 |
78 |
79 |
85 |
86 |
92 |
93 |
94 |
95 |
97 |
98 |
101 |
102 |
103 |
--------------------------------------------------------------------------------
/md5/md5.mojo:
--------------------------------------------------------------------------------
1 | # Based on https://github.com/Zunawe/md5-c
2 |
3 | from utils.loop import unroll
4 | from memory.unsafe import bitcast
5 | from memory import memset_zero
6 | from bit import rotate_bits_left
7 |
8 | alias S = SIMD[DType.uint32, 64](
9 | 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22,
10 | 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20,
11 | 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23,
12 | 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21
13 | )
14 |
15 | alias K = SIMD[DType.uint32, 64](
16 | 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee,
17 | 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501,
18 | 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be,
19 | 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821,
20 | 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa,
21 | 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8,
22 | 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed,
23 | 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a,
24 | 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c,
25 | 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
26 | 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05,
27 | 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,
28 | 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039,
29 | 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,
30 | 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1,
31 | 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391
32 | )
33 |
34 | alias PADDING = create_padding()
35 |
36 | fn create_padding() -> UnsafePointer[UInt8]:
37 | var result = UnsafePointer[UInt8].alloc(64)
38 | result.store(0, 0x80)
39 | for i in range(1, 64):
40 | result.store(i, 0)
41 | return result
42 |
43 | struct Md5Context:
44 | var buffer: SIMD[DType.uint32, 4]
45 | var input: SIMD[DType.uint8, 64]
46 | var digest: SIMD[DType.uint8, 16]
47 | var size: UInt64
48 |
49 | fn __init__(inout self):
50 | self.size = 0
51 | self.buffer = SIMD[DType.uint32, 4](0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476)
52 | self.input = SIMD[DType.uint8, 64]()
53 | self.digest = SIMD[DType.uint8, 16]()
54 |
55 | @always_inline
56 | fn update(inout self, input_buffer: UnsafePointer[UInt8], length: Int):
57 | var offset = int(self.size & 63)
58 | var input = SIMD[DType.uint32, 16]()
59 | self.size += length
60 |
61 | for i in range(length):
62 | self.input[offset] = input_buffer.offset(i).load()
63 | offset += 1
64 | if offset & 63 == 0:
65 | # TODO: check if it works on BigEndian arch (or needs bswap?)
66 | input = bitcast[DType.uint32, 16](self.input)
67 | self.step(input)
68 | offset = 0
69 |
70 | @always_inline
71 | fn finalize(owned self) -> SIMD[DType.uint8, 16]:
72 | var input = SIMD[DType.uint32, 16]()
73 | var offset = int(self.size & 63)
74 | var padding_length = 56 - offset if offset < 56 else 56 + 64 - offset
75 |
76 | self.update(PADDING, padding_length)
77 | self.size -= padding_length
78 | input = bitcast[DType.uint32, 16](self.input)
79 | input[14] = (self.size * 8).cast[DType.uint32]()
80 | input[15] = ((self.size * 8) >> 32).cast[DType.uint32]()
81 | self.step(input)
82 | return bitcast[DType.uint8, 16](self.buffer)
83 |
84 | @always_inline
85 | fn step(inout self, input: SIMD[DType.uint32, 16]):
86 | var aa = self.buffer[0]
87 | var bb = self.buffer[1]
88 | var cc = self.buffer[2]
89 | var dd = self.buffer[3]
90 |
91 | var e: UInt32 = 0
92 | var j = 0
93 |
94 | @parameter
95 | fn shuffle[i: Int]():
96 | alias step = i >> 4
97 | @parameter
98 | if step == 0:
99 | e = (bb & cc) | (~bb & dd)
100 | j = i
101 | elif step == 1:
102 | e = (bb & dd) | (cc & ~dd)
103 | j = (i * 5 + 1) & 15
104 | elif step == 2:
105 | e = bb ^ cc ^ dd
106 | j = (i * 3 + 5) & 15
107 | else:
108 | e = cc ^ (bb | ~dd)
109 | j = (i * 7) & 15
110 | aa, bb, cc, dd = dd, bb + rotate_bits_left[int(S[i])](aa + e + K[i] + input[j]), bb, cc
111 |
112 | unroll[shuffle, 64]()
113 |
114 | self.buffer += SIMD[DType.uint32, 4](aa, bb, cc, dd)
115 |
116 | @always_inline
117 | fn md5_string(value: String) -> SIMD[DType.uint8, 16]:
118 | var ctx = Md5Context()
119 | ctx.update(value.unsafe_ptr(), len(value))
120 | return ctx^.finalize()
121 |
--------------------------------------------------------------------------------
/benchmark_other_languages/rust/hash_functions/Cargo.lock:
--------------------------------------------------------------------------------
1 | # This file is automatically @generated by Cargo.
2 | # It is not intended for manual editing.
3 | version = 3
4 |
5 | [[package]]
6 | name = "ahash"
7 | version = "0.8.6"
8 | source = "registry+https://github.com/rust-lang/crates.io-index"
9 | checksum = "91429305e9f0a25f6205c5b8e0d2db09e0708a7a6df0f42212bb56c32c8ac97a"
10 | dependencies = [
11 | "cfg-if",
12 | "getrandom",
13 | "once_cell",
14 | "version_check",
15 | "zerocopy",
16 | ]
17 |
18 | [[package]]
19 | name = "byteorder"
20 | version = "1.5.0"
21 | source = "registry+https://github.com/rust-lang/crates.io-index"
22 | checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
23 |
24 | [[package]]
25 | name = "cfg-if"
26 | version = "1.0.0"
27 | source = "registry+https://github.com/rust-lang/crates.io-index"
28 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
29 |
30 | [[package]]
31 | name = "fxhash"
32 | version = "0.2.1"
33 | source = "registry+https://github.com/rust-lang/crates.io-index"
34 | checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c"
35 | dependencies = [
36 | "byteorder",
37 | ]
38 |
39 | [[package]]
40 | name = "getrandom"
41 | version = "0.2.11"
42 | source = "registry+https://github.com/rust-lang/crates.io-index"
43 | checksum = "fe9006bed769170c11f845cf00c7c1e9092aeb3f268e007c3e760ac68008070f"
44 | dependencies = [
45 | "cfg-if",
46 | "libc",
47 | "wasi",
48 | ]
49 |
50 | [[package]]
51 | name = "hash_functions"
52 | version = "0.1.0"
53 | dependencies = [
54 | "ahash",
55 | "fxhash",
56 | "md5",
57 | "rand",
58 | "wyhash2",
59 | ]
60 |
61 | [[package]]
62 | name = "libc"
63 | version = "0.2.151"
64 | source = "registry+https://github.com/rust-lang/crates.io-index"
65 | checksum = "302d7ab3130588088d277783b1e2d2e10c9e9e4a16dd9050e6ec93fb3e7048f4"
66 |
67 | [[package]]
68 | name = "md5"
69 | version = "0.7.0"
70 | source = "registry+https://github.com/rust-lang/crates.io-index"
71 | checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"
72 |
73 | [[package]]
74 | name = "no-std-compat"
75 | version = "0.4.1"
76 | source = "registry+https://github.com/rust-lang/crates.io-index"
77 | checksum = "b93853da6d84c2e3c7d730d6473e8817692dd89be387eb01b94d7f108ecb5b8c"
78 |
79 | [[package]]
80 | name = "once_cell"
81 | version = "1.19.0"
82 | source = "registry+https://github.com/rust-lang/crates.io-index"
83 | checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
84 |
85 | [[package]]
86 | name = "ppv-lite86"
87 | version = "0.2.17"
88 | source = "registry+https://github.com/rust-lang/crates.io-index"
89 | checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
90 |
91 | [[package]]
92 | name = "proc-macro2"
93 | version = "1.0.71"
94 | source = "registry+https://github.com/rust-lang/crates.io-index"
95 | checksum = "75cb1540fadbd5b8fbccc4dddad2734eba435053f725621c070711a14bb5f4b8"
96 | dependencies = [
97 | "unicode-ident",
98 | ]
99 |
100 | [[package]]
101 | name = "quote"
102 | version = "1.0.33"
103 | source = "registry+https://github.com/rust-lang/crates.io-index"
104 | checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae"
105 | dependencies = [
106 | "proc-macro2",
107 | ]
108 |
109 | [[package]]
110 | name = "rand"
111 | version = "0.8.5"
112 | source = "registry+https://github.com/rust-lang/crates.io-index"
113 | checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
114 | dependencies = [
115 | "libc",
116 | "rand_chacha",
117 | "rand_core",
118 | ]
119 |
120 | [[package]]
121 | name = "rand_chacha"
122 | version = "0.3.1"
123 | source = "registry+https://github.com/rust-lang/crates.io-index"
124 | checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
125 | dependencies = [
126 | "ppv-lite86",
127 | "rand_core",
128 | ]
129 |
130 | [[package]]
131 | name = "rand_core"
132 | version = "0.6.4"
133 | source = "registry+https://github.com/rust-lang/crates.io-index"
134 | checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
135 | dependencies = [
136 | "getrandom",
137 | ]
138 |
139 | [[package]]
140 | name = "syn"
141 | version = "2.0.42"
142 | source = "registry+https://github.com/rust-lang/crates.io-index"
143 | checksum = "5b7d0a2c048d661a1a59fcd7355baa232f7ed34e0ee4df2eef3c1c1c0d3852d8"
144 | dependencies = [
145 | "proc-macro2",
146 | "quote",
147 | "unicode-ident",
148 | ]
149 |
150 | [[package]]
151 | name = "unicode-ident"
152 | version = "1.0.12"
153 | source = "registry+https://github.com/rust-lang/crates.io-index"
154 | checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
155 |
156 | [[package]]
157 | name = "version_check"
158 | version = "0.9.4"
159 | source = "registry+https://github.com/rust-lang/crates.io-index"
160 | checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
161 |
162 | [[package]]
163 | name = "wasi"
164 | version = "0.11.0+wasi-snapshot-preview1"
165 | source = "registry+https://github.com/rust-lang/crates.io-index"
166 | checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
167 |
168 | [[package]]
169 | name = "wyhash2"
170 | version = "0.2.1"
171 | source = "registry+https://github.com/rust-lang/crates.io-index"
172 | checksum = "9433c7c86e328a8197038c9fc31f6e5c81f9c7bdc087d86e266680236af1af1b"
173 | dependencies = [
174 | "no-std-compat",
175 | ]
176 |
177 | [[package]]
178 | name = "zerocopy"
179 | version = "0.7.32"
180 | source = "registry+https://github.com/rust-lang/crates.io-index"
181 | checksum = "74d4d3961e53fa4c9a25a8637fc2bfaf2595b3d3ae34875568a5cf64787716be"
182 | dependencies = [
183 | "zerocopy-derive",
184 | ]
185 |
186 | [[package]]
187 | name = "zerocopy-derive"
188 | version = "0.7.32"
189 | source = "registry+https://github.com/rust-lang/crates.io-index"
190 | checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6"
191 | dependencies = [
192 | "proc-macro2",
193 | "quote",
194 | "syn",
195 | ]
196 |
--------------------------------------------------------------------------------
/benchmark_hash_functions.mojo:
--------------------------------------------------------------------------------
1 | from collections import Set
2 | from time import now
3 | from memory.unsafe import bitcast
4 | # from fiby_tree import FibyTree
5 | from my_utils import int_cmp64, int_to_str64, cmp_str, stsl, int_cmp, int_to_str, corpus1, corpus2, corpus3, corpus4, corpus5, corpus6, corpus7, corpus8
6 | from ahasher import ahash
7 | from wyhasher import wyhash
8 | from fnv1a import fnv1a64, fnv1a32
9 | from fxhash import fxhash64, fxhash32
10 | from md5 import md5_string
11 | # from rapidhash import rapid_hash
12 | from o1hash import o1_hash
13 |
14 | @always_inline
15 | fn std_hash64(s: String) -> UInt64:
16 | return hash(s)
17 |
18 |
19 | @always_inline
20 | fn md5_hash(s: String) -> UInt64:
21 | return bitcast[DType.uint64, 2](md5_string(s))[0]
22 |
23 | fn benchamark[hashfn: fn(String) -> UInt64, steps: Int = 20](corpus: List[String], name: StringLiteral, ):
24 | # var f = FibyTree[UInt64, int_cmp64, int_to_str64]()
25 | # var f1 = FibyTree[UInt64, int_cmp64, int_to_str64]()
26 | var fs = Set[String]()
27 | var min_avg: Float64 = 100000.0
28 | var mod = (1 << 9)
29 | var hashes = List[UInt64]()
30 | var mod_hashes: List[UInt64] = List[UInt64]()
31 | var total = 0
32 | for step in range(steps):
33 | for i in range(len(corpus)):
34 | var key = corpus[i]
35 | var tik = now()
36 | var hash = hashfn(key)
37 | var tok = now()
38 | # hash_total += hash
39 | total += tok - tik
40 | var found = False
41 | for i in range(len(hashes)):
42 | if hash == hashes[i]:
43 | found = True
44 | break
45 | if not found:
46 | hashes.append(hash)
47 | found = False
48 | for i in range(len(mod_hashes)):
49 | if hash & (mod - 1) == mod_hashes[i]:
50 | found = True
51 | break
52 | if not found:
53 | mod_hashes.append(hash & (mod - 1))# f.add(hash)
54 | # f1.add(hash & (mod - 1))
55 | if step == 0:
56 | fs.add(key)
57 | var c_avg = (total / steps) / len(corpus)
58 | min_avg = min(min_avg, c_avg)
59 | print(
60 | name, "avg hash compute", min_avg, "| hash colision", len(fs) / len(hashes),
61 | "| hash colision mod", mod, len(fs) / len(mod_hashes)
62 | )
63 |
64 | fn benchamark32[hashfn: fn(String) -> UInt32, steps: Int = 20](corpus: List[String], name: StringLiteral):
65 | # var f = FibyTree[UInt32, int_cmp, int_to_str]()
66 | # var f1 = FibyTree[UInt32, int_cmp, int_to_str]()
67 | var fs = Set[String]()
68 | var min_avg: Float64 = 100000.0
69 | var mod = (1 << 9)
70 | var hashes: List[UInt32] = List[UInt32]()
71 | var mod_hashes: List[UInt32] = List[UInt32]()
72 | var total = 0
73 | for step in range(steps):
74 | for i in range(len(corpus)):
75 | var key = corpus[i]
76 | var tik = now()
77 | var hash = hashfn(key)
78 | var tok = now()
79 | total += tok - tik
80 | var found = False
81 | for i in range(len(hashes)):
82 | if hash == hashes[i]:
83 | found = True
84 | break
85 | if not found:
86 | hashes.append(hash)
87 | found = False
88 | for i in range(len(mod_hashes)):
89 | if hash & (mod - 1) == mod_hashes[i]:
90 | found = True
91 | break
92 | if not found:
93 | mod_hashes.append(hash & (mod - 1))
94 | # f.add(hash)
95 | # f1.add(hash & (mod - 1))
96 | if step == 0:
97 | fs.add(key)
98 | var c_avg = (total / steps) / len(corpus)
99 | min_avg = min(min_avg, c_avg)
100 | print(
101 | name, "avg hash compute", min_avg, "| hash colision", len(fs) / len(hashes),
102 | "| hash colision mod", mod, len(fs) / len(mod_hashes)
103 | )
104 |
105 |
106 | fn corpus_details(corpus: List[String]):
107 | var word_count = len(corpus)
108 | # print(word_count)
109 | var fs = Set[String]()
110 | var min_key_size = 10000000
111 | var max_key_size = 0
112 | var total_key_size = 0
113 | for i in range(word_count - 1):
114 | var key = corpus[i]
115 | fs.add(key)
116 | var key_size = len(key)
117 | # print(key_size)
118 | total_key_size += key_size
119 | min_key_size = min(min_key_size, key_size)
120 | max_key_size = max(max_key_size, key_size)
121 |
122 | print(
123 | "Word count", word_count, "| unique word count",
124 | len(fs),
125 | "| min key size", min_key_size, "| avg key size", total_key_size / word_count, "| max key size", max_key_size
126 | )
127 |
128 |
129 | fn sample_wyhash(s : String) -> UInt64:
130 | var default_secret = SIMD[DType.uint64, 4](0xa0761d6478bd642f, 0xe7037ed1a0b428db, 0x8ebc6af09c88c6e3, 0x589965cc75374cc3)
131 | return wyhash(s, 0, default_secret)
132 |
133 | # fn sample_rapidhash(s : String) -> UInt64:
134 | # var hash = rapid_hash(s.unsafe_ptr(), len(s))
135 | # _ = s
136 | # return hash
137 |
138 | fn sample_fxhash64(s : String) -> UInt64:
139 | return fxhash64(s, 0)
140 |
141 | fn sample_fxhash32(s : String) -> UInt32:
142 | return fxhash32(s, 0)
143 |
144 | fn main() raises:
145 | var c1 = corpus1()
146 | print("\nCorpus 1")
147 | corpus_details(c1)
148 | benchamark[ahash](c1, "AHash")
149 | benchamark[sample_wyhash](c1, "Wyhash")
150 | # benchamark[sample_rapidhash](c1, "Rapidhash")
151 | benchamark32[fnv1a32](c1, "fnv1a32")
152 | benchamark[fnv1a64](c1, "fnv1a64")
153 | benchamark32[sample_fxhash32](c1, "fxHash32")
154 | benchamark[sample_fxhash64](c1, "fxHash64")
155 | benchamark[std_hash64](c1, "std_Hash64")
156 | benchamark[o1_hash](c1, "o1Hash")
157 | benchamark[md5_hash](c1, "MD5")
158 |
159 | var c2 = corpus2()
160 | print("\nCorpus 2")
161 | corpus_details(c2)
162 | benchamark[ahash](c2, "AHash")
163 | benchamark[sample_wyhash](c2, "Wyhash")
164 | # benchamark[sample_rapidhash](c2, "Rapidhash")
165 | benchamark32[fnv1a32](c2, "fnv1a32")
166 | benchamark[fnv1a64](c2, "fnv1a64")
167 | benchamark32[sample_fxhash32](c2, "fxHash32")
168 | benchamark[sample_fxhash64](c2, "fxHash64")
169 | benchamark[std_hash64](c2, "std_Hash64")
170 | benchamark[o1_hash](c2, "o1Hash")
171 | benchamark[md5_hash](c2, "MD5")
172 |
173 | # var c3 = corpus3()
174 | # print("\nCorpus 3")
175 | # corpus_details(c3)
176 | # benchamark[ahash](c3, "AHash")
177 | # benchamark[sample_wyhash](c3, "Wyhash")
178 | # benchamark[sample_rapidhash](c3, "Rapidhash")
179 | # benchamark32[fnv1a32](c3, "fnv1a32")
180 | # benchamark[fnv1a64](c3, "fnv1a64")
181 | # benchamark32[sample_fxhash32](c3, "fxHash32")
182 | # benchamark[sample_fxhash64](c3, "fxHash64")
183 | # benchamark[std_hash64](c3, "std_Hash64")
184 | # benchamark[o1_hash](c3, "o1Hash")
185 | # benchamark[md5_hash](c3, "MD5")
186 |
187 | # var c4 = corpus4()
188 | # print("\nCorpus 4")
189 | # corpus_details(c4)
190 | # benchamark[ahash](c4, "AHash")
191 | # benchamark[sample_wyhash](c4, "Wyhash")
192 | # benchamark[sample_rapidhash](c4, "Rapidhash")
193 | # benchamark32[fnv1a32](c4, "fnv1a32")
194 | # benchamark[fnv1a64](c4, "fnv1a64")
195 | # benchamark32[sample_fxhash32](c4, "fxHash32")
196 | # benchamark[sample_fxhash64](c4, "fxHash64")
197 | # benchamark[std_hash64](c4, "std_Hash64")
198 | # benchamark[o1_hash](c4, "o1Hash")
199 | # benchamark[md5_hash](c4, "MD5")
200 |
201 | # var c5 = corpus5()
202 | # print("\nCorpus 5")
203 | # corpus_details(c5)
204 | # benchamark[ahash](c5, "AHash")
205 | # benchamark[sample_wyhash](c5, "Wyhash")
206 | # benchamark[sample_rapidhash](c5, "Rapidhash")
207 | # benchamark32[fnv1a32](c5, "fnv1a32")
208 | # benchamark[fnv1a64](c5, "fnv1a64")
209 | # benchamark32[sample_fxhash32](c5, "fxHash32")
210 | # benchamark[sample_fxhash64](c5, "fxHash64")
211 | # benchamark[std_hash64](c5, "std_Hash64")
212 | # benchamark[o1_hash](c5, "o1Hash")
213 | # benchamark[md5_hash](c5, "MD5")
214 |
215 | # var c6 = corpus6()
216 | # print("\nCorpus 6")
217 | # corpus_details(c6)
218 | # benchamark[ahash](c6, "AHash")
219 | # benchamark[sample_wyhash](c6, "Wyhash")
220 | # benchamark[sample_rapidhash](c6, "Rapidhash")
221 | # benchamark32[fnv1a32](c6, "fnv1a32")
222 | # benchamark[fnv1a64](c6, "fnv1a64")
223 | # benchamark32[sample_fxhash32](c6, "fxHash32")
224 | # benchamark[sample_fxhash64](c6, "fxHash64")
225 | # benchamark[std_hash64](c6, "std_Hash64")
226 | # benchamark[o1_hash](c6, "o1Hash")
227 | # benchamark[md5_hash](c6, "MD5")
228 |
229 | var c7 = corpus7()
230 | print("\nCorpus 7")
231 | corpus_details(c7)
232 | benchamark[ahash](c7, "AHash")
233 | benchamark[sample_wyhash](c7, "Wyhash")
234 | # benchamark[sample_rapidhash](c7, "Rapidhash")
235 | benchamark32[fnv1a32](c7, "fnv1a32")
236 | benchamark[fnv1a64](c7, "fnv1a64")
237 | benchamark32[sample_fxhash32](c7, "fxHash32")
238 | benchamark[sample_fxhash64](c7, "fxHash64")
239 | benchamark[std_hash64](c7, "std_Hash64")
240 | benchamark[o1_hash](c7, "o1Hash")
241 | benchamark[md5_hash](c7, "MD5")
242 |
243 | var c8 = corpus8()
244 | print("\nCorpus 8")
245 | corpus_details(c8)
246 | benchamark[ahash, 3](c8, "AHash")
247 | benchamark[sample_wyhash, 3](c8, "Wyhash")
248 | # benchamark[sample_rapidhash](c8, "Rapidhash")
249 | benchamark32[fnv1a32, 3](c8, "fnv1a32")
250 | benchamark[fnv1a64, 3](c8, "fnv1a64")
251 | benchamark32[sample_fxhash32, 3](c8, "fxHash32")
252 | benchamark[sample_fxhash64, 3](c8, "fxHash64")
253 | benchamark[std_hash64, 3](c8, "std_Hash64")
254 | benchamark[o1_hash](c8, "o1Hash")
255 | # benchamark[md5_hash, 1](c8, "MD5")
256 |
--------------------------------------------------------------------------------
/sha/sha256.mojo:
--------------------------------------------------------------------------------
1 | from memory import memcpy
2 | from collections.vector import InlinedFixedVector
3 | import time
4 |
5 | @always_inline
6 | fn big_endian_bytes_to_dword(
7 | first: UInt8, second: UInt8, third: UInt8, fourth: UInt8
8 | ) -> UInt32:
9 | var a = first.cast[DType.uint32]() << 24
10 | var b = second.cast[DType.uint32]() << 16
11 | var c = third.cast[DType.uint32]() << 8
12 | var d = fourth.cast[DType.uint32]() << 0
13 | return a | b | c | d
14 |
15 |
16 | @always_inline
17 | fn big_endian_dword_to_bytes(word: UInt32) -> InlinedFixedVector[UInt8, 4]:
18 | var v = InlinedFixedVector[UInt8, 4](4)
19 | var a = (word >> 24) & 255
20 | var b = (word >> 16) & 255
21 | var c = (word >> 8) & 255
22 | var d = word & 255
23 | v.append(a.cast[DType.uint8]())
24 | v.append(b.cast[DType.uint8]())
25 | v.append(c.cast[DType.uint8]())
26 | v.append(d.cast[DType.uint8]())
27 | return v
28 |
29 |
30 | @always_inline
31 | fn big_endian_qword_to_bytes(word: UInt64) -> InlinedFixedVector[UInt8, 8]:
32 | var v = InlinedFixedVector[UInt8, 8](8)
33 | var a = (word >> 56) & 255
34 | var b = (word >> 48) & 255
35 | var c = (word >> 40) & 255
36 | var d = (word >> 32) & 255
37 | var e = (word >> 24) & 255
38 | var f = (word >> 16) & 255
39 | var g = (word >> 8) & 255
40 | var h = word & 255
41 | v.append(a.cast[DType.uint8]())
42 | v.append(b.cast[DType.uint8]())
43 | v.append(c.cast[DType.uint8]())
44 | v.append(d.cast[DType.uint8]())
45 | v.append(e.cast[DType.uint8]())
46 | v.append(f.cast[DType.uint8]())
47 | v.append(g.cast[DType.uint8]())
48 | v.append(h.cast[DType.uint8]())
49 | return v
50 |
51 |
52 | # bit rotate right
53 | @always_inline
54 | fn bitrr(integer: UInt32, rotations: UInt32) -> UInt32:
55 | return (integer >> rotations) | (integer << (32 - rotations))
56 |
57 |
58 | alias k = SIMD[DType.uint32, 64](
59 | 0x428A2F98, 0x71374491, 0xB5C0FBCF, 0xE9B5DBA5, 0x3956C25B, 0x59F111F1, 0x923F82A4, 0xAB1C5ED5,
60 | 0xD807AA98, 0x12835B01, 0x243185BE, 0x550C7DC3, 0x72BE5D74, 0x80DEB1FE, 0x9BDC06A7, 0xC19BF174,
61 | 0xE49B69C1, 0xEFBE4786, 0x0FC19DC6, 0x240CA1CC, 0x2DE92C6F, 0x4A7484AA, 0x5CB0A9DC, 0x76F988DA,
62 | 0x983E5152, 0xA831C66D, 0xB00327C8, 0xBF597FC7, 0xC6E00BF3, 0xD5A79147, 0x06CA6351, 0x14292967,
63 | 0x27B70A85, 0x2E1B2138, 0x4D2C6DFC, 0x53380D13, 0x650A7354, 0x766A0ABB, 0x81C2C92E, 0x92722C85,
64 | 0xA2BFE8A1, 0xA81A664B, 0xC24B8B70, 0xC76C51A3, 0xD192E819, 0xD6990624, 0xF40E3585, 0x106AA070,
65 | 0x19A4C116, 0x1E376C08, 0x2748774C, 0x34B0BCB5, 0x391C0CB3, 0x4ED8AA4A, 0x5B9CCA4F, 0x682E6FF3,
66 | 0x748F82EE, 0x78A5636F, 0x84C87814, 0x8CC70208, 0x90BEFFFA, 0xA4506CEB, 0xBEF9A3F7, 0xC67178F2
67 | )
68 |
69 | alias h = SIMD[DType.uint32, 8](
70 | 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19,
71 | )
72 |
73 | # for reference see https://en.wikipedia.org/wiki/SHA-2#Pseudocode
74 | # right now it internally copies the byte_view into a dynamic vector and works on that
75 | # this is slow, but i don't have the mojo mojo to chunk it out for zero-copy
76 | fn sha256_encode(byte_view: UnsafePointer[UInt8], length: Int) -> InlinedFixedVector[UInt8, 32]:
77 |
78 | var h0: UInt32 = 0x6A09E667
79 | var h1: UInt32 = 0xBB67AE85
80 | var h2: UInt32 = 0x3C6EF372
81 | var h3: UInt32 = 0xA54FF53A
82 | var h4: UInt32 = 0x510E527F
83 | var h5: UInt32 = 0x9B05688C
84 | var h6: UInt32 = 0x1F83D9AB
85 | var h7: UInt32 = 0x5BE0CD19
86 |
87 | var one_bit: UInt8 = 0b1000_0000
88 |
89 | var exact_chunks = length // 64
90 | var remainder_start = exact_chunks * 64
91 | var remainder_length = length % 64
92 | var bare_min_extra_bytes = remainder_length + 9
93 | var extra_space = InlinedFixedVector[UInt8,128](128)
94 | for i in range(remainder_length):
95 | extra_space.append(byte_view[remainder_start + i])
96 |
97 | extra_space.append(one_bit)
98 | var only_one_chunk_needed = bare_min_extra_bytes <= 64
99 | var tail_bytes = big_endian_qword_to_bytes(length * 8)
100 | if only_one_chunk_needed:
101 | while 8+extra_space.current_size < 64:
102 | extra_space.append(0)
103 | else:
104 | while 8+extra_space.current_size < 128:
105 | extra_space.append(0)
106 |
107 | for i in range(8):
108 | extra_space.append(tail_bytes[i])
109 |
110 |
111 | var w = InlinedFixedVector[UInt32, 64](64)
112 | # (The initial values in w[0..63] don't matter, so many implementations zero them here)
113 | for i in range(64):
114 | w.append(0)
115 |
116 |
117 | # loop through the full sets of 64 from the byte view
118 | # later, a little code duplication to repeat on the extra space
119 | for chunk_number in range(exact_chunks):
120 | # create a 64-entry message schedule array w[0..63] of 32-bit words
121 |
122 | # copy chunk into first 16 words w[0..15] of the message schedule array
123 | @parameter
124 | for dword_i in range(16):
125 | var start_byte_within_chunk = dword_i * 4
126 | var start_byte_overall = start_byte_within_chunk + (64 * chunk_number)
127 | var i = start_byte_overall
128 | var dword = big_endian_bytes_to_dword(
129 | byte_view[i],
130 | byte_view[i + 1],
131 | byte_view[i + 2],
132 | byte_view[i + 3],
133 | )
134 | w[dword_i] = dword
135 |
136 | # Extend the first 16 words into the remaining 48 words w[16..63] of the message schedule array:
137 | @parameter
138 | for i in range(16, 64):
139 | # s0 := (w[i-15] rightrotate 7) xor (w[i-15] rightrotate 18) xor (w[i-15] rightshift 3)
140 | var s0 = bitrr(w[i - 15], 7) ^ bitrr(w[i - 15], 18) ^ (w[i - 15] >> 3)
141 | # s1 := (w[i-2] rightrotate 17) xor (w[i-2] rightrotate 19) xor (w[i-2] rightshift 10)
142 | var s1 = bitrr(w[i - 2], 17) ^ bitrr(w[i - 2], 19) ^ (w[i - 2] >> 10)
143 | # w[i] := w[i-16] + s0 + w[i-7] + s1
144 | w[i] = w[i - 16] + s0 + w[i - 7] + s1
145 |
146 | var a = h0
147 | var b = h1
148 | var c = h2
149 | var d = h3
150 | var e = h4
151 | var f = h5
152 | var g = h6
153 | var h = h7
154 |
155 | @parameter
156 | for i in range(64):
157 | # S1 := (e rightrotate 6) xor (e rightrotate 11) xor (e rightrotate 25)
158 | var S1 = bitrr(e, 6) ^ bitrr(e, 11) ^ bitrr(e, 25)
159 | # ch := (e and f) xor ((not e) and g)
160 | var ch = (e & f) ^ ((e ^ (0-1)) & g)
161 | # temp1 := h + S1 + ch + k[i] + w[i]
162 | var temp1 = h + S1 + ch + k[i] + w[i]
163 | # S0 := (a rightrotate 2) xor (a rightrotate 13) xor (a rightrotate 22)
164 | var S0 = bitrr(a, 2) ^ bitrr(a, 13) ^ bitrr(a, 22)
165 | # maj := (a and b) xor (a and c) xor (b and c)
166 | var maj = (a & b) ^ (a & c) ^ (b & c)
167 | # temp2 := S0 + maj
168 | var temp2 = S0 + maj
169 |
170 | h = g
171 | g = f
172 | f = e
173 | e = d + temp1
174 | d = c
175 | c = b
176 | b = a
177 | a = temp1 + temp2
178 |
179 | h0 = h0 + a
180 | h1 = h1 + b
181 | h2 = h2 + c
182 | h3 = h3 + d
183 | h4 = h4 + e
184 | h5 = h5 + f
185 | h6 = h6 + g
186 | h7 = h7 + h
187 |
188 | #continue through the extra space
189 | var extra_chunks = extra_space.current_size // 64
190 | for chunk_number in range(extra_chunks):
191 | # create a 64-entry message schedule array w[0..63] of 32-bit words
192 |
193 | # copy chunk into first 16 words w[0..15] of the message schedule array
194 | @parameter
195 | for dword_i in range(16):
196 | var start_byte_within_chunk = dword_i * 4
197 | var start_byte_overall = start_byte_within_chunk + (64 * chunk_number)
198 | var i = start_byte_overall
199 | var dword = big_endian_bytes_to_dword(
200 | extra_space[i],
201 | extra_space[i + 1],
202 | extra_space[i + 2],
203 | extra_space[i + 3],
204 | )
205 | w[dword_i] = dword
206 |
207 | # Extend the first 16 words into the remaining 48 words w[16..63] of the message schedule array:
208 | @parameter
209 | for i in range(16, 64):
210 | # s0 := (w[i-15] rightrotate 7) xor (w[i-15] rightrotate 18) xor (w[i-15] rightshift 3)
211 | var s0 = bitrr(w[i - 15], 7) ^ bitrr(w[i - 15], 18) ^ (w[i - 15] >> 3)
212 | # s1 := (w[i-2] rightrotate 17) xor (w[i-2] rightrotate 19) xor (w[i-2] rightshift 10)
213 | var s1 = bitrr(w[i - 2], 17) ^ bitrr(w[i - 2], 19) ^ (w[i - 2] >> 10)
214 | # w[i] := w[i-16] + s0 + w[i-7] + s1
215 | w[i] = w[i - 16] + s0 + w[i - 7] + s1
216 |
217 | var a = h0
218 | var b = h1
219 | var c = h2
220 | var d = h3
221 | var e = h4
222 | var f = h5
223 | var g = h6
224 | var h = h7
225 |
226 | @parameter
227 | for i in range(64):
228 | # S1 := (e rightrotate 6) xor (e rightrotate 11) xor (e rightrotate 25)
229 | var S1 = bitrr(e, 6) ^ bitrr(e, 11) ^ bitrr(e, 25)
230 | # ch := (e and f) xor ((not e) and g)
231 | var ch = (e & f) ^ ((e ^ (0-1)) & g)
232 | # temp1 := h + S1 + ch + k[i] + w[i]
233 | var temp1 = h + S1 + ch + k[i] + w[i]
234 | # S0 := (a rightrotate 2) xor (a rightrotate 13) xor (a rightrotate 22)
235 | var S0 = bitrr(a, 2) ^ bitrr(a, 13) ^ bitrr(a, 22)
236 | # maj := (a and b) xor (a and c) xor (b and c)
237 | var maj = (a & b) ^ (a & c) ^ (b & c)
238 | # temp2 := S0 + maj
239 | var temp2 = S0 + maj
240 |
241 | h = g
242 | g = f
243 | f = e
244 | e = d + temp1
245 | d = c
246 | c = b
247 | b = a
248 | a = temp1 + temp2
249 |
250 | h0 = h0 + a
251 | h1 = h1 + b
252 | h2 = h2 + c
253 | h3 = h3 + d
254 | h4 = h4 + e
255 | h5 = h5 + f
256 | h6 = h6 + g
257 | h7 = h7 + h
258 |
259 |
260 |
261 | var output = InlinedFixedVector[UInt8, 32](32)
262 |
263 | var digest_part_h0 = big_endian_dword_to_bytes(h0)
264 | for i in range(4):
265 | output.append(digest_part_h0[i])
266 | var digest_part_h1 = big_endian_dword_to_bytes(h1)
267 | for i in range(4):
268 | output.append(digest_part_h1[i])
269 | var digest_part_h2 = big_endian_dword_to_bytes(h2)
270 | for i in range(4):
271 | output.append(digest_part_h2[i])
272 | var digest_part_h3 = big_endian_dword_to_bytes(h3)
273 | for i in range(4):
274 | output.append(digest_part_h3[i])
275 | var digest_part_h4 = big_endian_dword_to_bytes(h4)
276 | for i in range(4):
277 | output.append(digest_part_h4[i])
278 | var digest_part_h5 = big_endian_dword_to_bytes(h5)
279 | for i in range(4):
280 | output.append(digest_part_h5[i])
281 | var digest_part_h6 = big_endian_dword_to_bytes(h6)
282 | for i in range(4):
283 | output.append(digest_part_h6[i])
284 | var digest_part_h7 = big_endian_dword_to_bytes(h7)
285 | for i in range(4):
286 | output.append(digest_part_h7[i])
287 |
288 | return output
289 |
--------------------------------------------------------------------------------
/test_sha256_2.mojo:
--------------------------------------------------------------------------------
1 | # from memory import memcpy
2 | # from collections.vector import InlinedFixedVector
3 | # import time
4 |
5 |
6 | # fn main():
7 | # var bytes = 1024 * 1024 * 256 + 78
8 | # var bytes_to_hash: List[UInt8] = kinda_random_bytes(bytes)
9 | # var ptr = bytes_to_hash.unsafe_ptr()
10 | # var buffer = Buffer[DType.uint8](ptr, bytes_to_hash.size)
11 | # var before = time.now()
12 | # var hash = sha256(buffer)
13 | # var after = time.now()
14 | # var keep_vector_alive = bytes_to_hash[4]
15 | # var ns = after - before
16 | # var seconds = ns / 1_000_000_000
17 | # var megabytes = bytes / 1_000_000
18 | # for i in range(hash.size):
19 | # print(hash[i])
20 | # print("megabytes per second")
21 | # print(megabytes / seconds)
22 |
23 |
24 | # fn kinda_random_bytes(length: Int) -> DynamicVector[UInt8]:
25 | # var vec = DynamicVector[UInt8](capacity=length)
26 | # var n: UInt8 = 245
27 | # var cycle: UInt8 = 1
28 | # for i in range(length):
29 | # var shifted = n >> 3
30 | # var shiftalso = n << 4
31 | # var more = shifted ^ n ^ shiftalso
32 | # var next = n + more
33 | # n = next
34 | # cycle ^= n
35 | # vec.append(n + cycle)
36 |
37 | # return vec
38 |
39 |
40 | # @always_inline
41 | # fn big_endian_bytes_to_dword(
42 | # first: UInt8, second: UInt8, third: UInt8, fourth: UInt8
43 | # ) -> UInt32:
44 | # var a = first.cast[DType.uint32]() << 24
45 | # var b = second.cast[DType.uint32]() << 16
46 | # var c = third.cast[DType.uint32]() << 8
47 | # var d = fourth.cast[DType.uint32]() << 0
48 | # return a | b | c | d
49 |
50 |
51 | # @always_inline
52 | # fn big_endian_dword_to_bytes(word: UInt32) -> InlinedFixedVector[UInt8, 4]:
53 | # var v = InlinedFixedVector[UInt8, 4](4)
54 | # var a = (word >> 24) & 255
55 | # var b = (word >> 16) & 255
56 | # var c = (word >> 8) & 255
57 | # var d = word & 255
58 | # v.append(a.cast[DType.uint8]())
59 | # v.append(b.cast[DType.uint8]())
60 | # v.append(c.cast[DType.uint8]())
61 | # v.append(d.cast[DType.uint8]())
62 | # return v
63 |
64 |
65 | # @always_inline
66 | # fn big_endian_qword_to_bytes(word: UInt64) -> InlinedFixedVector[UInt8, 8]:
67 | # var v = InlinedFixedVector[UInt8, 8](8)
68 | # var a = (word >> 56) & 255
69 | # var b = (word >> 48) & 255
70 | # var c = (word >> 40) & 255
71 | # var d = (word >> 32) & 255
72 | # var e = (word >> 24) & 255
73 | # var f = (word >> 16) & 255
74 | # var g = (word >> 8) & 255
75 | # var h = word & 255
76 | # v.append(a.cast[DType.uint8]())
77 | # v.append(b.cast[DType.uint8]())
78 | # v.append(c.cast[DType.uint8]())
79 | # v.append(d.cast[DType.uint8]())
80 | # v.append(e.cast[DType.uint8]())
81 | # v.append(f.cast[DType.uint8]())
82 | # v.append(g.cast[DType.uint8]())
83 | # v.append(h.cast[DType.uint8]())
84 | # return v
85 |
86 |
87 | # # bit rotate right
88 | # @always_inline
89 | # fn bitrr(integer: UInt32, rotations: UInt32) -> UInt32:
90 | # return (integer >> rotations) | (integer << (32 - rotations))
91 |
92 |
93 | # # for reference see https://en.wikipedia.org/wiki/SHA-2#Pseudocode
94 | # # right now it internally copies the byte_view into a dynamic vector and works on that
95 | # # this is slow, but i don't have the mojo mojo to chunk it out for zero-copy
96 | # fn sha256(byte_view: Buffer[_, DType.uint8, 0]) -> InlinedFixedVector[UInt8, 32]:
97 | # var k = InlinedFixedVector[UInt32, 64](64)
98 | # k.append(0x428A2F98)
99 | # k.append(0x71374491)
100 | # k.append(0xB5C0FBCF)
101 | # k.append(0xE9B5DBA5)
102 | # k.append(0x3956C25B)
103 | # k.append(0x59F111F1)
104 | # k.append(0x923F82A4)
105 | # k.append(0xAB1C5ED5)
106 | # k.append(0xD807AA98)
107 | # k.append(0x12835B01)
108 | # k.append(0x243185BE)
109 | # k.append(0x550C7DC3)
110 | # k.append(0x72BE5D74)
111 | # k.append(0x80DEB1FE)
112 | # k.append(0x9BDC06A7)
113 | # k.append(0xC19BF174)
114 | # k.append(0xE49B69C1)
115 | # k.append(0xEFBE4786)
116 | # k.append(0x0FC19DC6)
117 | # k.append(0x240CA1CC)
118 | # k.append(0x2DE92C6F)
119 | # k.append(0x4A7484AA)
120 | # k.append(0x5CB0A9DC)
121 | # k.append(0x76F988DA)
122 | # k.append(0x983E5152)
123 | # k.append(0xA831C66D)
124 | # k.append(0xB00327C8)
125 | # k.append(0xBF597FC7)
126 | # k.append(0xC6E00BF3)
127 | # k.append(0xD5A79147)
128 | # k.append(0x06CA6351)
129 | # k.append(0x14292967)
130 | # k.append(0x27B70A85)
131 | # k.append(0x2E1B2138)
132 | # k.append(0x4D2C6DFC)
133 | # k.append(0x53380D13)
134 | # k.append(0x650A7354)
135 | # k.append(0x766A0ABB)
136 | # k.append(0x81C2C92E)
137 | # k.append(0x92722C85)
138 | # k.append(0xA2BFE8A1)
139 | # k.append(0xA81A664B)
140 | # k.append(0xC24B8B70)
141 | # k.append(0xC76C51A3)
142 | # k.append(0xD192E819)
143 | # k.append(0xD6990624)
144 | # k.append(0xF40E3585)
145 | # k.append(0x106AA070)
146 | # k.append(0x19A4C116)
147 | # k.append(0x1E376C08)
148 | # k.append(0x2748774C)
149 | # k.append(0x34B0BCB5)
150 | # k.append(0x391C0CB3)
151 | # k.append(0x4ED8AA4A)
152 | # k.append(0x5B9CCA4F)
153 | # k.append(0x682E6FF3)
154 | # k.append(0x748F82EE)
155 | # k.append(0x78A5636F)
156 | # k.append(0x84C87814)
157 | # k.append(0x8CC70208)
158 | # k.append(0x90BEFFFA)
159 | # k.append(0xA4506CEB)
160 | # k.append(0xBEF9A3F7)
161 | # k.append(0xC67178F2)
162 |
163 | # var h0: UInt32 = 0x6A09E667
164 | # var h1: UInt32 = 0xBB67AE85
165 | # var h2: UInt32 = 0x3C6EF372
166 | # var h3: UInt32 = 0xA54FF53A
167 | # var h4: UInt32 = 0x510E527F
168 | # var h5: UInt32 = 0x9B05688C
169 | # var h6: UInt32 = 0x1F83D9AB
170 | # var h7: UInt32 = 0x5BE0CD19
171 |
172 | # var one_bit: UInt8 = 0b1000_0000
173 |
174 | # var exact_chunks = byte_view.dynamic_size // 64
175 | # var remainder_start = exact_chunks * 64
176 | # var remainder_length = byte_view.dynamic_size % 64
177 | # var bare_min_extra_bytes = remainder_length + 9
178 | # var extra_space = InlinedFixedVector[UInt8,128](128)
179 | # for i in range(remainder_length):
180 | # extra_space.append(byte_view[remainder_start + i])
181 |
182 | # extra_space.append(one_bit)
183 | # var only_one_chunk_needed = bare_min_extra_bytes <= 64
184 | # var tail_bytes = big_endian_qword_to_bytes(byte_view.dynamic_size * 8)
185 | # if only_one_chunk_needed:
186 | # while 8+extra_space.current_size < 64:
187 | # extra_space.append(0)
188 | # else:
189 | # while 8+extra_space.current_size < 128:
190 | # extra_space.append(0)
191 |
192 | # for i in range(8):
193 | # extra_space.append(tail_bytes[i])
194 |
195 |
196 | # var w = InlinedFixedVector[UInt32, 64](64)
197 | # # (The initial values in w[0..63] don't matter, so many implementations zero them here)
198 | # for i in range(64):
199 | # w.append(0)
200 |
201 |
202 | # # loop through the full sets of 64 from the byte view
203 | # # later, a little code duplication to repeat on the extra space
204 | # for chunk_number in range(exact_chunks):
205 | # # create a 64-entry message schedule array w[0..63] of 32-bit words
206 |
207 | # # copy chunk into first 16 words w[0..15] of the message schedule array
208 | # @unroll
209 | # for dword_i in range(16):
210 | # var start_byte_within_chunk = dword_i * 4
211 | # var start_byte_overall = start_byte_within_chunk + (64 * chunk_number)
212 | # var i = start_byte_overall
213 | # var dword = big_endian_bytes_to_dword(
214 | # byte_view[i],
215 | # byte_view[i + 1],
216 | # byte_view[i + 2],
217 | # byte_view[i + 3],
218 | # )
219 | # w[dword_i] = dword
220 |
221 | # # Extend the first 16 words into the remaining 48 words w[16..63] of the message schedule array:
222 | # @unroll
223 | # for i in range(16, 64):
224 | # # s0 := (w[i-15] rightrotate 7) xor (w[i-15] rightrotate 18) xor (w[i-15] rightshift 3)
225 | # var s0 = bitrr(w[i - 15], 7) ^ bitrr(w[i - 15], 18) ^ (w[i - 15] >> 3)
226 | # # s1 := (w[i-2] rightrotate 17) xor (w[i-2] rightrotate 19) xor (w[i-2] rightshift 10)
227 | # var s1 = bitrr(w[i - 2], 17) ^ bitrr(w[i - 2], 19) ^ (w[i - 2] >> 10)
228 | # # w[i] := w[i-16] + s0 + w[i-7] + s1
229 | # w[i] = w[i - 16] + s0 + w[i - 7] + s1
230 |
231 | # var a = h0
232 | # var b = h1
233 | # var c = h2
234 | # var d = h3
235 | # var e = h4
236 | # var f = h5
237 | # var g = h6
238 | # var h = h7
239 |
240 | # @unroll
241 | # for i in range(64):
242 | # # S1 := (e rightrotate 6) xor (e rightrotate 11) xor (e rightrotate 25)
243 | # var S1 = bitrr(e, 6) ^ bitrr(e, 11) ^ bitrr(e, 25)
244 | # # ch := (e and f) xor ((not e) and g)
245 | # var ch = (e & f) ^ ((e ^ (0-1)) & g)
246 | # # temp1 := h + S1 + ch + k[i] + w[i]
247 | # var temp1 = h + S1 + ch + k[i] + w[i]
248 | # # S0 := (a rightrotate 2) xor (a rightrotate 13) xor (a rightrotate 22)
249 | # var S0 = bitrr(a, 2) ^ bitrr(a, 13) ^ bitrr(a, 22)
250 | # # maj := (a and b) xor (a and c) xor (b and c)
251 | # var maj = (a & b) ^ (a & c) ^ (b & c)
252 | # # temp2 := S0 + maj
253 | # var temp2 = S0 + maj
254 |
255 | # h = g
256 | # g = f
257 | # f = e
258 | # e = d + temp1
259 | # d = c
260 | # c = b
261 | # b = a
262 | # a = temp1 + temp2
263 |
264 | # h0 = h0 + a
265 | # h1 = h1 + b
266 | # h2 = h2 + c
267 | # h3 = h3 + d
268 | # h4 = h4 + e
269 | # h5 = h5 + f
270 | # h6 = h6 + g
271 | # h7 = h7 + h
272 |
273 | # #continue through the extra space
274 | # var extra_chunks = extra_space.current_size // 64
275 | # for chunk_number in range(extra_chunks):
276 | # # create a 64-entry message schedule array w[0..63] of 32-bit words
277 |
278 | # # copy chunk into first 16 words w[0..15] of the message schedule array
279 | # @unroll
280 | # for dword_i in range(16):
281 | # var start_byte_within_chunk = dword_i * 4
282 | # var start_byte_overall = start_byte_within_chunk + (64 * chunk_number)
283 | # var i = start_byte_overall
284 | # var dword = big_endian_bytes_to_dword(
285 | # extra_space[i],
286 | # extra_space[i + 1],
287 | # extra_space[i + 2],
288 | # extra_space[i + 3],
289 | # )
290 | # w[dword_i] = dword
291 |
292 | # # Extend the first 16 words into the remaining 48 words w[16..63] of the message schedule array:
293 | # @unroll
294 | # for i in range(16, 64):
295 | # # s0 := (w[i-15] rightrotate 7) xor (w[i-15] rightrotate 18) xor (w[i-15] rightshift 3)
296 | # var s0 = bitrr(w[i - 15], 7) ^ bitrr(w[i - 15], 18) ^ (w[i - 15] >> 3)
297 | # # s1 := (w[i-2] rightrotate 17) xor (w[i-2] rightrotate 19) xor (w[i-2] rightshift 10)
298 | # var s1 = bitrr(w[i - 2], 17) ^ bitrr(w[i - 2], 19) ^ (w[i - 2] >> 10)
299 | # # w[i] := w[i-16] + s0 + w[i-7] + s1
300 | # w[i] = w[i - 16] + s0 + w[i - 7] + s1
301 |
302 | # var a = h0
303 | # var b = h1
304 | # var c = h2
305 | # var d = h3
306 | # var e = h4
307 | # var f = h5
308 | # var g = h6
309 | # var h = h7
310 |
311 | # @unroll
312 | # for i in range(64):
313 | # # S1 := (e rightrotate 6) xor (e rightrotate 11) xor (e rightrotate 25)
314 | # var S1 = bitrr(e, 6) ^ bitrr(e, 11) ^ bitrr(e, 25)
315 | # # ch := (e and f) xor ((not e) and g)
316 | # var ch = (e & f) ^ ((e ^ (0-1)) & g)
317 | # # temp1 := h + S1 + ch + k[i] + w[i]
318 | # var temp1 = h + S1 + ch + k[i] + w[i]
319 | # # S0 := (a rightrotate 2) xor (a rightrotate 13) xor (a rightrotate 22)
320 | # var S0 = bitrr(a, 2) ^ bitrr(a, 13) ^ bitrr(a, 22)
321 | # # maj := (a and b) xor (a and c) xor (b and c)
322 | # var maj = (a & b) ^ (a & c) ^ (b & c)
323 | # # temp2 := S0 + maj
324 | # var temp2 = S0 + maj
325 |
326 | # h = g
327 | # g = f
328 | # f = e
329 | # e = d + temp1
330 | # d = c
331 | # c = b
332 | # b = a
333 | # a = temp1 + temp2
334 |
335 | # h0 = h0 + a
336 | # h1 = h1 + b
337 | # h2 = h2 + c
338 | # h3 = h3 + d
339 | # h4 = h4 + e
340 | # h5 = h5 + f
341 | # h6 = h6 + g
342 | # h7 = h7 + h
343 |
344 |
345 |
346 | # var output = InlinedFixedVector[UInt8, 32](32)
347 |
348 | # var digest_part_h0 = big_endian_dword_to_bytes(h0)
349 | # for i in range(4):
350 | # output.append(digest_part_h0[i])
351 | # var digest_part_h1 = big_endian_dword_to_bytes(h1)
352 | # for i in range(4):
353 | # output.append(digest_part_h1[i])
354 | # var digest_part_h2 = big_endian_dword_to_bytes(h2)
355 | # for i in range(4):
356 | # output.append(digest_part_h2[i])
357 | # var digest_part_h3 = big_endian_dword_to_bytes(h3)
358 | # for i in range(4):
359 | # output.append(digest_part_h3[i])
360 | # var digest_part_h4 = big_endian_dword_to_bytes(h4)
361 | # for i in range(4):
362 | # output.append(digest_part_h4[i])
363 | # var digest_part_h5 = big_endian_dword_to_bytes(h5)
364 | # for i in range(4):
365 | # output.append(digest_part_h5[i])
366 | # var digest_part_h6 = big_endian_dword_to_bytes(h6)
367 | # for i in range(4):
368 | # output.append(digest_part_h6[i])
369 | # var digest_part_h7 = big_endian_dword_to_bytes(h7)
370 | # for i in range(4):
371 | # output.append(digest_part_h7[i])
372 |
373 | # return output
374 |
--------------------------------------------------------------------------------
/benchmark_other_languages/c/hash_functions/wyhash.h:
--------------------------------------------------------------------------------
1 | // This is free and unencumbered software released into the public domain under The Unlicense (http://unlicense.org/)
2 | // main repo: https://github.com/wangyi-fudan/wyhash
3 | // author: 王一 Wang Yi
4 | // contributors: Reini Urban, Dietrich Epp, Joshua Haberman, Tommy Ettinger, Daniel Lemire, Otmar Ertl, cocowalla, leo-yuriev, Diego Barrios Romero, paulie-g, dumblob, Yann Collet, ivte-ms, hyb, James Z.M. Gao, easyaspi314 (Devin), TheOneric
5 |
6 | /* quick example:
7 | string s="fjsakfdsjkf";
8 | uint64_t hash=wyhash(s.c_str(), s.size(), 0, _wyp);
9 | */
10 |
11 | #ifndef wyhash_final_version_4_2
12 | #define wyhash_final_version_4_2
13 |
14 | #ifndef WYHASH_CONDOM
15 | //protections that produce different results:
16 | //1: normal valid behavior
17 | //2: extra protection against entropy loss (probability=2^-63), aka. "blind multiplication"
18 | #define WYHASH_CONDOM 1
19 | #endif
20 |
21 | #ifndef WYHASH_32BIT_MUM
22 | //0: normal version, slow on 32 bit systems
23 | //1: faster on 32 bit systems but produces different results, incompatible with wy2u0k function
24 | #define WYHASH_32BIT_MUM 0
25 | #endif
26 |
27 | //includes
28 | #include
29 | #include
30 | #if defined(_MSC_VER) && defined(_M_X64)
31 | #include
32 | #pragma intrinsic(_umul128)
33 | #endif
34 |
35 | //likely and unlikely macros
36 | #if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
37 | #define _likely_(x) __builtin_expect(x,1)
38 | #define _unlikely_(x) __builtin_expect(x,0)
39 | #else
40 | #define _likely_(x) (x)
41 | #define _unlikely_(x) (x)
42 | #endif
43 |
44 | //128bit multiply function
45 | static inline uint64_t _wyrot(uint64_t x) { return (x>>32)|(x<<32); }
46 | static inline void _wymum(uint64_t *A, uint64_t *B){
47 | #if(WYHASH_32BIT_MUM)
48 | uint64_t hh=(*A>>32)*(*B>>32), hl=(*A>>32)*(uint32_t)*B, lh=(uint32_t)*A*(*B>>32), ll=(uint64_t)(uint32_t)*A*(uint32_t)*B;
49 | #if(WYHASH_CONDOM>1)
50 | *A^=_wyrot(hl)^hh; *B^=_wyrot(lh)^ll;
51 | #else
52 | *A=_wyrot(hl)^hh; *B=_wyrot(lh)^ll;
53 | #endif
54 | #elif defined(__SIZEOF_INT128__)
55 | __uint128_t r=*A; r*=*B;
56 | #if(WYHASH_CONDOM>1)
57 | *A^=(uint64_t)r; *B^=(uint64_t)(r>>64);
58 | #else
59 | *A=(uint64_t)r; *B=(uint64_t)(r>>64);
60 | #endif
61 | #elif defined(_MSC_VER) && defined(_M_X64)
62 | #if(WYHASH_CONDOM>1)
63 | uint64_t a, b;
64 | a=_umul128(*A,*B,&b);
65 | *A^=a; *B^=b;
66 | #else
67 | *A=_umul128(*A,*B,B);
68 | #endif
69 | #else
70 | uint64_t ha=*A>>32, hb=*B>>32, la=(uint32_t)*A, lb=(uint32_t)*B, hi, lo;
71 | uint64_t rh=ha*hb, rm0=ha*lb, rm1=hb*la, rl=la*lb, t=rl+(rm0<<32), c=t>32)+(rm1>>32)+c;
73 | #if(WYHASH_CONDOM>1)
74 | *A^=lo; *B^=hi;
75 | #else
76 | *A=lo; *B=hi;
77 | #endif
78 | #endif
79 | }
80 |
81 | //multiply and xor mix function, aka MUM
82 | static inline uint64_t _wymix(uint64_t A, uint64_t B){ _wymum(&A,&B); return A^B; }
83 |
84 | //endian macros
85 | #ifndef WYHASH_LITTLE_ENDIAN
86 | #if defined(_WIN32) || defined(__LITTLE_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
87 | #define WYHASH_LITTLE_ENDIAN 1
88 | #elif defined(__BIG_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
89 | #define WYHASH_LITTLE_ENDIAN 0
90 | #else
91 | #warning could not determine endianness! Falling back to little endian.
92 | #define WYHASH_LITTLE_ENDIAN 1
93 | #endif
94 | #endif
95 |
96 | //read functions
97 | #if (WYHASH_LITTLE_ENDIAN)
98 | static inline uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return v;}
99 | static inline uint64_t _wyr4(const uint8_t *p) { uint32_t v; memcpy(&v, p, 4); return v;}
100 | #elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
101 | static inline uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return __builtin_bswap64(v);}
102 | static inline uint64_t _wyr4(const uint8_t *p) { uint32_t v; memcpy(&v, p, 4); return __builtin_bswap32(v);}
103 | #elif defined(_MSC_VER)
104 | static inline uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return _byteswap_uint64(v);}
105 | static inline uint64_t _wyr4(const uint8_t *p) { uint32_t v; memcpy(&v, p, 4); return _byteswap_ulong(v);}
106 | #else
107 | static inline uint64_t _wyr8(const uint8_t *p) {
108 | uint64_t v; memcpy(&v, p, 8);
109 | return (((v >> 56) & 0xff)| ((v >> 40) & 0xff00)| ((v >> 24) & 0xff0000)| ((v >> 8) & 0xff000000)| ((v << 8) & 0xff00000000)| ((v << 24) & 0xff0000000000)| ((v << 40) & 0xff000000000000)| ((v << 56) & 0xff00000000000000));
110 | }
111 | static inline uint64_t _wyr4(const uint8_t *p) {
112 | uint32_t v; memcpy(&v, p, 4);
113 | return (((v >> 24) & 0xff)| ((v >> 8) & 0xff00)| ((v << 8) & 0xff0000)| ((v << 24) & 0xff000000));
114 | }
115 | #endif
116 | static inline uint64_t _wyr3(const uint8_t *p, size_t k) { return (((uint64_t)p[0])<<16)|(((uint64_t)p[k>>1])<<8)|p[k-1];}
117 | //wyhash main function
118 | static inline uint64_t wyhash(const void *key, size_t len, uint64_t seed, const uint64_t *secret){
119 | const uint8_t *p=(const uint8_t *)key; seed^=_wymix(seed^secret[0],secret[1]); uint64_t a, b;
120 | if(_likely_(len<=16)){
121 | if(_likely_(len>=4)){ a=(_wyr4(p)<<32)|_wyr4(p+((len>>3)<<2)); b=(_wyr4(p+len-4)<<32)|_wyr4(p+len-4-((len>>3)<<2)); }
122 | else if(_likely_(len>0)){ a=_wyr3(p,len); b=0;}
123 | else a=b=0;
124 | }
125 | else{
126 | size_t i=len;
127 | if(_unlikely_(i>=48)){
128 | uint64_t see1=seed, see2=seed;
129 | do{
130 | seed=_wymix(_wyr8(p)^secret[1],_wyr8(p+8)^seed);
131 | see1=_wymix(_wyr8(p+16)^secret[2],_wyr8(p+24)^see1);
132 | see2=_wymix(_wyr8(p+32)^secret[3],_wyr8(p+40)^see2);
133 | p+=48; i-=48;
134 | }while(_likely_(i>=48));
135 | seed^=see1^see2;
136 | }
137 | while(_unlikely_(i>16)){ seed=_wymix(_wyr8(p)^secret[1],_wyr8(p+8)^seed); i-=16; p+=16; }
138 | a=_wyr8(p+i-16); b=_wyr8(p+i-8);
139 | }
140 | a^=secret[1]; b^=seed; _wymum(&a,&b);
141 | return _wymix(a^secret[0]^len,b^secret[1]);
142 | }
143 |
144 | //the default secret parameters
145 | static const uint64_t _wyp[4] = {0x2d358dccaa6c78a5ull, 0x8bb84b93962eacc9ull, 0x4b33a62ed433d4a3ull, 0x4d5a2da51de1aa47ull};
146 |
147 | //a useful 64bit-64bit mix function to produce deterministic pseudo random numbers that can pass BigCrush and PractRand
148 | static inline uint64_t wyhash64(uint64_t A, uint64_t B){ A^=0x2d358dccaa6c78a5ull; B^=0x8bb84b93962eacc9ull; _wymum(&A,&B); return _wymix(A^0x2d358dccaa6c78a5ull,B^0x8bb84b93962eacc9ull);}
149 |
150 | //The wyrand PRNG that pass BigCrush and PractRand
151 | static inline uint64_t wyrand(uint64_t *seed){ *seed+=0x2d358dccaa6c78a5ull; return _wymix(*seed,*seed^0x8bb84b93962eacc9ull);}
152 |
153 | //convert any 64 bit pseudo random numbers to uniform distribution [0,1). It can be combined with wyrand, wyhash64 or wyhash.
154 | static inline double wy2u01(uint64_t r){ const double _wynorm=1.0/(1ull<<52); return (r>>12)*_wynorm;}
155 |
156 | //convert any 64 bit pseudo random numbers to APPROXIMATE Gaussian distribution. It can be combined with wyrand, wyhash64 or wyhash.
157 | static inline double wy2gau(uint64_t r){ const double _wynorm=1.0/(1ull<<20); return ((r&0x1fffff)+((r>>21)&0x1fffff)+((r>>42)&0x1fffff))*_wynorm-3.0;}
158 |
159 | #ifdef WYTRNG
160 | #include
161 | //The wytrand true random number generator, passed BigCrush.
162 | static inline uint64_t wytrand(uint64_t *seed){
163 | struct timeval t; gettimeofday(&t,0);
164 | uint64_t teed=(((uint64_t)t.tv_sec)<<32)|t.tv_usec;
165 | teed=_wymix(teed^_wyp[0],*seed^_wyp[1]);
166 | *seed=_wymix(teed^_wyp[0],_wyp[2]);
167 | return _wymix(*seed,*seed^_wyp[3]);
168 | }
169 | #endif
170 |
171 | #if(!WYHASH_32BIT_MUM)
172 | //fast range integer random number generation on [0,k) credit to Daniel Lemire. May not work when WYHASH_32BIT_MUM=1. It can be combined with wyrand, wyhash64 or wyhash.
173 | static inline uint64_t wy2u0k(uint64_t r, uint64_t k){ _wymum(&r,&k); return k; }
174 | #endif
175 |
176 | // modified from https://github.com/going-digital/Prime64
177 | static inline unsigned long long mul_mod(unsigned long long a, unsigned long long b, unsigned long long m) {
178 | unsigned long long r=0;
179 | while (b) {
180 | if (b & 1) {
181 | unsigned long long r2 = r + a;
182 | if (r2 < r) r2 -= m;
183 | r = r2 % m;
184 | }
185 | b >>= 1;
186 | if (b) {
187 | unsigned long long a2 = a + a;
188 | if (a2 < a) a2 -= m;
189 | a = a2 % m;
190 | }
191 | }
192 | return r;
193 | }
194 | static inline unsigned long long pow_mod(unsigned long long a, unsigned long long b, unsigned long long m) {
195 | unsigned long long r=1;
196 | while (b) {
197 | if (b&1) r=mul_mod(r,a,m);
198 | b>>=1;
199 | if (b) a=mul_mod(a,a,m);
200 | }
201 | return r;
202 | }
203 | unsigned sprp(unsigned long long n, unsigned long long a) {
204 | unsigned long long d=n-1;
205 | unsigned char s=0;
206 | while (!(d & 0xff)) { d>>=8; s+=8; }
207 | if (!(d & 0xf)) { d>>=4; s+=4; }
208 | if (!(d & 0x3)) { d>>=2; s+=2; }
209 | if (!(d & 0x1)) { d>>=1; s+=1; }
210 | unsigned long long b=pow_mod(a,d,n);
211 | if ((b==1) || (b==(n-1))) return 1;
212 | unsigned char r;
213 | for (r=1; r> 1) & 0x5555555555555555;
256 | x = (x & 0x3333333333333333) + ((x >> 2) & 0x3333333333333333);
257 | x = (x + (x >> 4)) & 0x0f0f0f0f0f0f0f0f;
258 | x = (x * 0x0101010101010101) >> 56;
259 | if(x!=32){ ok=0; break; }
260 | #endif
261 | }
262 | if(ok&&!is_prime(secret[i])) ok=0;
263 | }while(!ok);
264 | }
265 | }
266 |
267 | #endif
268 |
269 | /* The Unlicense
270 | This is free and unencumbered software released into the public domain.
271 |
272 | Anyone is free to copy, modify, publish, use, compile, sell, or
273 | distribute this software, either in source code form or as a compiled
274 | binary, for any purpose, commercial or non-commercial, and by any
275 | means.
276 |
277 | In jurisdictions that recognize copyright laws, the author or authors
278 | of this software dedicate any and all copyright interest in the
279 | software to the public domain. We make this dedication for the benefit
280 | of the public at large and to the detriment of our heirs and
281 | successors. We intend this dedication to be an overt act of
282 | relinquishment in perpetuity of all present and future rights to this
283 | software under copyright law.
284 |
285 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
286 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
287 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
288 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
289 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
290 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
291 | OTHER DEALINGS IN THE SOFTWARE.
292 |
293 | For more information, please refer to
294 | */
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # mojo-hash
2 | A collection of hash functions implemented in Mojo.
3 |
4 | ## AHash
5 | Original repo: https://github.com/tkaitchuck/aHash
6 | Note: implements the fallback version (without AES-NI intrinsics use), uses folded multiply function without u128 support
7 |
8 | ## fnv1a
9 | Original repo: https://github.com/ziglang/zig/blob/master/lib/std/hash/fnv.zig
10 | Note: implements 32 and 64 bit variants
11 |
12 | ## fxhash
13 | Original repo: https://github.com/cbreeden/fxhash/tree/master
14 | Note: implements 32 and 64 bit variants
15 |
16 | ## Wyhash
17 | Original repo: https://github.com/wangyi-fudan/wyhash
18 | Note: `wymum` implemented as if `WYHASH_32BIT_MUM` is set and `WYHASH_CONDOM` not set. Little endian only.
19 |
20 | ## Benachmark
21 | Collecets average hash function runtime in nanoseconds based on 7 different word collections. The average runtime is computed 20 times on each word collection, the fastest is kept as final result. Shows collision on full 32/64 bit space and 1024 mod (10 bit) space
22 |
23 | ### Results
24 |
25 | CPU Specs: 11th Gen Intel(R) Core(TM) i7-1165G7 @ 2.80GHz
26 |
27 | ```
28 | Corpus 1
29 | Word count 100 | unique word count 82 | min key size 2 | avg key size 5.71 | max key size 12
30 | AHash avg hash compute 18.149999999999999 | hash colision 1.0 | hash colision mod 512 1.1549295774647887
31 | Wyhash avg hash compute 17.079999999999998 | hash colision 1.0 | hash colision mod 512 1.1232876712328768
32 | fnv1a32 avg hash compute 15.08 | hash colision 1.0 | hash colision mod 512 1.1232876712328768
33 | fnv1a64 avg hash compute 16.32 | hash colision 1.0 | hash colision mod 512 1.0249999999999999
34 | fxHash32 avg hash compute 12.539999999999999 | hash colision 1.0 | hash colision mod 512 1.2238805970149254
35 | fxHash64 avg hash compute 12.56 | hash colision 1.0 | hash colision mod 512 1.1884057971014492
36 | std_Hash64 avg hash compute 213.0 | hash colision 1.0 | hash colision mod 512 1.0512820512820513
37 |
38 | Corpus 2
39 | Word count 999 | unique word count 203 | min key size 1 | avg key size 4.8058058058058055 | max key size 14
40 | AHash avg hash compute 18.263263263263262 | hash colision 1.0 | hash colision mod 512 1.2083333333333333
41 | Wyhash avg hash compute 20.11011011011011 | hash colision 1.0 | hash colision mod 512 1.2303030303030302
42 | fnv1a32 avg hash compute 17.995995995995997 | hash colision 1.0 | hash colision mod 512 1.2848101265822784
43 | fnv1a64 avg hash compute 16.079079079079079 | hash colision 1.0 | hash colision mod 512 1.2011834319526626
44 | fxHash32 avg hash compute 14.397397397397397 | hash colision 1.0 | hash colision mod 512 1.3716216216216217
45 | fxHash64 avg hash compute 12.603603603603604 | hash colision 1.0 | hash colision mod 512 1.4195804195804196
46 | std_Hash64 avg hash compute 239.15815815815816 | hash colision 1.0 | hash colision mod 512 1.2303030303030302
47 |
48 | Corpus 3
49 | Word count 999 | unique word count 192 | min key size 1 | avg key size 4.293293293293293 | max key size 13
50 | AHash avg hash compute 16.716716716716718 | hash colision 1.0 | hash colision mod 512 1.1636363636363636
51 | Wyhash avg hash compute 16.952952952952952 | hash colision 1.0 | hash colision mod 512 1.2151898734177216
52 | fnv1a32 avg hash compute 15.968968968968969 | hash colision 1.0 | hash colision mod 512 1.1428571428571428
53 | fnv1a64 avg hash compute 18.862862862862862 | hash colision 1.0 | hash colision mod 512 1.2229299363057324
54 | fxHash32 avg hash compute 15.723723723723724 | hash colision 1.0 | hash colision mod 512 1.352112676056338
55 | fxHash64 avg hash compute 17.168168168168169 | hash colision 1.0 | hash colision mod 512 1.4436090225563909
56 | std_Hash64 avg hash compute 258.6146146146146 | hash colision 1.0 | hash colision mod 512 1.1779141104294479
57 |
58 | Corpus 4
59 | Word count 999 | unique word count 532 | min key size 2 | avg key size 10.646646646646646 | max key size 37
60 | AHash avg hash compute 20.205205205205207 | hash colision 1.0 | hash colision mod 512 1.5786350148367954
61 | Wyhash avg hash compute 20.234234234234233 | hash colision 1.0 | hash colision mod 512 1.5975975975975976
62 | fnv1a32 avg hash compute 21.814814814814813 | hash colision 1.0 | hash colision mod 512 1.6170212765957446
63 | fnv1a64 avg hash compute 24.41041041041041 | hash colision 1.0 | hash colision mod 512 1.5928143712574849
64 | fxHash32 avg hash compute 16.208208208208209 | hash colision 1.0 | hash colision mod 512 1.6677115987460815
65 | fxHash64 avg hash compute 15.890890890890891 | hash colision 1.0 | hash colision mod 512 1.9850746268656716
66 | std_Hash64 avg hash compute 218.3093093093093 | hash colision 1.0018832391713748 | hash colision mod 512 1.6170212765957446
67 |
68 | Corpus 5
69 | Word count 999 | unique word count 208 | min key size 2 | avg key size 5.6496496496496498 | max key size 18
70 | AHash avg hash compute 15.921921921921921 | hash colision 1.0 | hash colision mod 512 1.1620111731843576
71 | Wyhash avg hash compute 19.517517517517518 | hash colision 1.0 | hash colision mod 512 1.1685393258426966
72 | fnv1a32 avg hash compute 17.042042042042041 | hash colision 1.0 | hash colision mod 512 1.2093023255813953
73 | fnv1a64 avg hash compute 18.58958958958959 | hash colision 1.0 | hash colision mod 512 1.2530120481927711
74 | fxHash32 avg hash compute 14.552552552552553 | hash colision 1.0 | hash colision mod 512 1.3506493506493507
75 | fxHash64 avg hash compute 14.527527527527528 | hash colision 1.0 | hash colision mod 512 1.3594771241830066
76 | std_Hash64 avg hash compute 239.1181181181181 | hash colision 1.0 | hash colision mod 512 1.2023121387283238
77 |
78 | Corpus 6
79 | Word count 10 | unique word count 10 | min key size 378 | avg key size 499.19999999999999 | max key size 558
80 | AHash avg hash compute 67.400000000000006 | hash colision 1.0 | hash colision mod 512 1.0
81 | Wyhash avg hash compute 64.200000000000003 | hash colision 1.0 | hash colision mod 512 1.0
82 | fnv1a32 avg hash compute 499.60000000000002 | hash colision 1.0 | hash colision mod 512 1.0
83 | fnv1a64 avg hash compute 620.70000000000005 | hash colision 1.0 | hash colision mod 512 1.0
84 | fxHash32 avg hash compute 163.80000000000001 | hash colision 1.0 | hash colision mod 512 1.0
85 | fxHash64 avg hash compute 87.799999999999997 | hash colision 1.0 | hash colision mod 512 1.0
86 | std_Hash64 avg hash compute 247.59999999999999 | hash colision 1.0 | hash colision mod 512 1.0
87 |
88 | Corpus 7
89 | Word count 161 | unique word count 143 | min key size 8 | avg key size 22.260869565217391 | max key size 43
90 | AHash avg hash compute 19.546583850931675 | hash colision 1.0 | hash colision mod 512 1.1259842519685039
91 | Wyhash avg hash compute 22.670807453416149 | hash colision 1.0 | hash colision mod 512 1.1439999999999999
92 | fnv1a32 avg hash compute 32.900621118012424 | hash colision 1.0 | hash colision mod 512 1.153225806451613
93 | fnv1a64 avg hash compute 38.391304347826086 | hash colision 1.0 | hash colision mod 512 1.1626016260162602
94 | fxHash32 avg hash compute 20.043478260869566 | hash colision 1.0 | hash colision mod 512 1.1259842519685039
95 | fxHash64 avg hash compute 19.503105590062113 | hash colision 1.0 | hash colision mod 512 1.153225806451613
96 | std_Hash64 avg hash compute 242.59006211180125 | hash colision 1.0 | hash colision mod 512 1.1626016260162602
97 | ```
98 |
99 | MacMini M1, 2020
100 |
101 | ```
102 | Corpus 1
103 | Word count 100 | unique word count 82 | min key size 2 | avg key size 5.71 | max key size 12
104 | AHash avg hash compute 19.0 | hash colision 1.0 | hash colision mod 512 1.1549295774647887
105 | Wyhash avg hash compute 29.5 | hash colision 1.0 | hash colision mod 512 1.1232876712328768
106 | fnv1a32 avg hash compute 18.5 | hash colision 1.0 | hash colision mod 512 1.1232876712328768
107 | fnv1a64 avg hash compute 17.5 | hash colision 1.0 | hash colision mod 512 1.0249999999999999
108 | fxHash32 avg hash compute 18.0 | hash colision 1.0 | hash colision mod 512 1.2238805970149254
109 | fxHash64 avg hash compute 19.5 | hash colision 1.0 | hash colision mod 512 1.1884057971014492
110 | std_Hash64 avg hash compute 84.5 | hash colision 1.0 | hash colision mod 512 1.0512820512820513
111 |
112 | Corpus 2
113 | Word count 999 | unique word count 203 | min key size 1 | avg key size 4.8058058058058055 | max key size 14
114 | AHash avg hash compute 17.567567567567568 | hash colision 1.0 | hash colision mod 512 1.2083333333333333
115 | Wyhash avg hash compute 25.925925925925927 | hash colision 1.0 | hash colision mod 512 1.2303030303030302
116 | fnv1a32 avg hash compute 19.96996996996997 | hash colision 1.0 | hash colision mod 512 1.2848101265822784
117 | fnv1a64 avg hash compute 17.967967967967969 | hash colision 1.0 | hash colision mod 512 1.2011834319526626
118 | fxHash32 avg hash compute 16.016016016016017 | hash colision 1.0 | hash colision mod 512 1.3716216216216217
119 | fxHash64 avg hash compute 13.863863863863864 | hash colision 1.0 | hash colision mod 512 1.4195804195804196
120 | std_Hash64 avg hash compute 75.17517517517517 | hash colision 1.0 | hash colision mod 512 1.2303030303030302
121 |
122 | Corpus 3
123 | Word count 999 | unique word count 192 | min key size 1 | avg key size 4.293293293293293 | max key size 13
124 | AHash avg hash compute 18.468468468468469 | hash colision 1.0 | hash colision mod 512 1.1636363636363636
125 | Wyhash avg hash compute 24.474474474474473 | hash colision 1.0 | hash colision mod 512 1.2151898734177216
126 | fnv1a32 avg hash compute 19.81981981981982 | hash colision 1.0 | hash colision mod 512 1.1428571428571428
127 | fnv1a64 avg hash compute 17.417417417417418 | hash colision 1.0 | hash colision mod 512 1.2229299363057324
128 | fxHash32 avg hash compute 15.665665665665665 | hash colision 1.0 | hash colision mod 512 1.352112676056338
129 | fxHash64 avg hash compute 16.216216216216218 | hash colision 1.0 | hash colision mod 512 1.4436090225563909
130 | std_Hash64 avg hash compute 87.037037037037038 | hash colision 1.0 | hash colision mod 512 1.1779141104294479
131 |
132 | Corpus 4
133 | Word count 999 | unique word count 532 | min key size 2 | avg key size 10.646646646646646 | max key size 37
134 | AHash avg hash compute 19.51951951951952 | hash colision 1.0 | hash colision mod 512 1.5786350148367954
135 | Wyhash avg hash compute 24.874874874874873 | hash colision 1.0 | hash colision mod 512 1.5975975975975976
136 | fnv1a32 avg hash compute 25.575575575575577 | hash colision 1.0 | hash colision mod 512 1.6170212765957446
137 | fnv1a64 avg hash compute 24.274274274274273 | hash colision 1.0 | hash colision mod 512 1.5928143712574849
138 | fxHash32 avg hash compute 15.665665665665665 | hash colision 1.0 | hash colision mod 512 1.6677115987460815
139 | fxHash64 avg hash compute 17.867867867867869 | hash colision 1.0 | hash colision mod 512 1.9850746268656716
140 | std_Hash64 avg hash compute 73.523523523523522 | hash colision 1.0018832391713748 | hash colision mod 512 1.5833333333333333
141 |
142 | Corpus 5
143 | Word count 999 | unique word count 208 | min key size 2 | avg key size 5.6496496496496498 | max key size 18
144 | AHash avg hash compute 17.817817817817819 | hash colision 1.0 | hash colision mod 512 1.1620111731843576
145 | Wyhash avg hash compute 26.576576576576578 | hash colision 1.0 | hash colision mod 512 1.1685393258426966
146 | fnv1a32 avg hash compute 19.76976976976977 | hash colision 1.0 | hash colision mod 512 1.2093023255813953
147 | fnv1a64 avg hash compute 18.918918918918919 | hash colision 1.0 | hash colision mod 512 1.2530120481927711
148 | fxHash32 avg hash compute 17.817817817817819 | hash colision 1.0 | hash colision mod 512 1.3506493506493507
149 | fxHash64 avg hash compute 15.715715715715715 | hash colision 1.0 | hash colision mod 512 1.3594771241830066
150 | std_Hash64 avg hash compute 75.425425425425431 | hash colision 1.0 | hash colision mod 512 1.2023121387283238
151 |
152 | Corpus 6
153 | Word count 10 | unique word count 10 | min key size 378 | avg key size 499.19999999999999 | max key size 558
154 | AHash avg hash compute 30.0 | hash colision 1.0 | hash colision mod 512 1.0
155 | Wyhash avg hash compute 90.0 | hash colision 1.0 | hash colision mod 512 1.0
156 | fnv1a32 avg hash compute 635.0 | hash colision 1.0 | hash colision mod 512 1.0
157 | fnv1a64 avg hash compute 660.0 | hash colision 1.0 | hash colision mod 512 1.0
158 | fxHash32 avg hash compute 250.0 | hash colision 1.0 | hash colision mod 512 1.0
159 | fxHash64 avg hash compute 145.0 | hash colision 1.0 | hash colision mod 512 1.0
160 | std_Hash64 avg hash compute 220.0 | hash colision 1.0 | hash colision mod 512 1.0
161 |
162 | Corpus 7
163 | Word count 161 | unique word count 143 | min key size 8 | avg key size 22.260869565217391 | max key size 43
164 | AHash avg hash compute 17.701863354037268 | hash colision 1.0 | hash colision mod 512 1.1259842519685039
165 | Wyhash avg hash compute 29.19254658385093 | hash colision 1.0 | hash colision mod 512 1.1439999999999999
166 | fnv1a32 avg hash compute 42.546583850931675 | hash colision 1.0 | hash colision mod 512 1.153225806451613
167 | fnv1a64 avg hash compute 39.440993788819874 | hash colision 1.0 | hash colision mod 512 1.1626016260162602
168 | fxHash32 avg hash compute 18.012422360248447 | hash colision 1.0 | hash colision mod 512 1.1259842519685039
169 | fxHash64 avg hash compute 19.875776397515526 | hash colision 1.0 | hash colision mod 512 1.153225806451613
170 | std_Hash64 avg hash compute 113.35403726708074 | hash colision 1.0 | hash colision mod 512 1.1259842519685039
171 | ```
172 |
173 | 
174 |
175 | ### Other languages benchmarks results:
176 |
177 | #### CPU Specs: 11th Gen Intel(R) Core(TM) i7-1165G7 @ 2.80GHz
178 |
179 | **Rust**
180 |
181 | ```
182 | Avg time Default, 16.622: total elements: 100, unique elements: 82, collisions: 1.0, collisions % 512: 1.0649350649350648, keys min: 2, avg: 6, max: 12
183 | Avg time FxHasher, 17.769000000000002: total elements: 100, unique elements: 82, collisions: 1.0, collisions % 512: 1.0933333333333333, keys min: 2, avg: 6, max: 12
184 | Avg time AHasher, 16.5035: total elements: 100, unique elements: 82, collisions: 1.0, collisions % 512: 1.1232876712328768, keys min: 2, avg: 6, max: 12
185 | Avg time WyHash, 17.2495: total elements: 100, unique elements: 82, collisions: 1.0, collisions % 512: 1.0933333333333333, keys min: 2, avg: 6, max: 12
186 | Avg time Default, 16.026176176176175: total elements: 999, unique elements: 203, collisions: 1.0, collisions % 512: 1.1871345029239766, keys min: 1, avg: 5, max: 14
187 | Avg time FxHasher, 16.14034034034034: total elements: 999, unique elements: 203, collisions: 1.0, collisions % 512: 1.2083333333333333, keys min: 1, avg: 5, max: 14
188 | Avg time AHasher, 16.455305305305306: total elements: 999, unique elements: 203, collisions: 1.0, collisions % 512: 1.2011834319526626, keys min: 1, avg: 5, max: 14
189 | Avg time WyHash, 17.575925925925926: total elements: 999, unique elements: 203, collisions: 1.0, collisions % 512: 1.2083333333333333, keys min: 1, avg: 5, max: 14
190 | Avg time Default, 16.30725725725726: total elements: 999, unique elements: 192, collisions: 1.0, collisions % 512: 1.238709677419355, keys min: 1, avg: 5, max: 13
191 | Avg time FxHasher, 17.076676676676676: total elements: 999, unique elements: 192, collisions: 1.0, collisions % 512: 1.263157894736842, keys min: 1, avg: 5, max: 13
192 | Avg time AHasher, 16.477227227227228: total elements: 999, unique elements: 192, collisions: 1.0, collisions % 512: 1.2229299363057324, keys min: 1, avg: 5, max: 13
193 | Avg time WyHash, 17.61991991991992: total elements: 999, unique elements: 192, collisions: 1.0, collisions % 512: 1.2075471698113207, keys min: 1, avg: 5, max: 13
194 | Avg time Default, 17.10630630630631: total elements: 999, unique elements: 532, collisions: 1.0, collisions % 512: 1.5647058823529412, keys min: 2, avg: 12, max: 37
195 | Avg time FxHasher, 18.07007007007007: total elements: 999, unique elements: 532, collisions: 1.0, collisions % 512: 1.6419753086419753, keys min: 2, avg: 12, max: 37
196 | Avg time AHasher, 17.31981981981982: total elements: 999, unique elements: 532, collisions: 1.0, collisions % 512: 1.636923076923077, keys min: 2, avg: 12, max: 37
197 | Avg time WyHash, 17.735835835835836: total elements: 999, unique elements: 532, collisions: 1.0, collisions % 512: 1.636923076923077, keys min: 2, avg: 12, max: 37
198 | Avg time Default, 16.716816816816817: total elements: 999, unique elements: 208, collisions: 1.0, collisions % 512: 1.2606060606060605, keys min: 2, avg: 6, max: 18
199 | Avg time FxHasher, 17.642342342342342: total elements: 999, unique elements: 208, collisions: 1.0, collisions % 512: 1.2606060606060605, keys min: 2, avg: 6, max: 18
200 | Avg time AHasher, 16.40915915915916: total elements: 999, unique elements: 208, collisions: 1.0, collisions % 512: 1.1954022988505748, keys min: 2, avg: 6, max: 18
201 | Avg time WyHash, 17.5506006006006: total elements: 999, unique elements: 208, collisions: 1.0, collisions % 512: 1.1751412429378532, keys min: 2, avg: 6, max: 18
202 | Avg time Default, 126.03: total elements: 10, unique elements: 10, collisions: 1.0, collisions % 512: 1.0, keys min: 378, avg: 499, max: 558
203 | Avg time FxHasher, 93.1: total elements: 10, unique elements: 10, collisions: 1.0, collisions % 512: 1.0, keys min: 378, avg: 499, max: 558
204 | Avg time AHasher, 48.14: total elements: 10, unique elements: 10, collisions: 1.0, collisions % 512: 1.0, keys min: 378, avg: 499, max: 558
205 | Avg time WyHash, 43.175: total elements: 10, unique elements: 10, collisions: 1.0, collisions % 512: 1.0, keys min: 378, avg: 499, max: 558
206 | Avg time Default, 22.654658385093168: total elements: 161, unique elements: 143, collisions: 1.0, collisions % 512: 1.125984251968504, keys min: 8, avg: 22, max: 43
207 | Avg time FxHasher, 20.537888198757763: total elements: 161, unique elements: 143, collisions: 1.0, collisions % 512: 1.1916666666666667, keys min: 8, avg: 22, max: 43
208 | Avg time AHasher, 17.930124223602483: total elements: 161, unique elements: 143, collisions: 1.0, collisions % 512: 1.1349206349206349, keys min: 8, avg: 22, max: 43
209 | Avg time WyHash, 19.022360248447203: total elements: 161, unique elements: 143, collisions: 1.0, collisions % 512: 1.1349206349206349, keys min: 8, avg: 22, max: 43
210 | ```
211 |
212 | #### MacMini M1, 2020
213 |
214 | **Rust**
215 |
216 | ```
217 | Avg time Default, 26.552: total elements: 100, unique elements: 82, collisions: 1.0, collisions % 512: 1.0649350649350648, keys min: 2, avg: 6, max: 12
218 | Avg time FxHasher, 25.7875: total elements: 100, unique elements: 82, collisions: 1.0, collisions % 512: 1.0933333333333333, keys min: 2, avg: 6, max: 12
219 | Avg time AHasher, 26.688499999999998: total elements: 100, unique elements: 82, collisions: 1.0, collisions % 512: 1.1549295774647887, keys min: 2, avg: 6, max: 12
220 | Avg time WyHash, 27.168499999999998: total elements: 100, unique elements: 82, collisions: 1.0, collisions % 512: 1.0933333333333333, keys min: 2, avg: 6, max: 12
221 | Avg time Default, 30.68533533533534: total elements: 999, unique elements: 203, collisions: 1.0, collisions % 512: 1.1871345029239766, keys min: 1, avg: 5, max: 14
222 | Avg time FxHasher, 32.62207207207207: total elements: 999, unique elements: 203, collisions: 1.0, collisions % 512: 1.2083333333333333, keys min: 1, avg: 5, max: 14
223 | Avg time AHasher, 30.133333333333333: total elements: 999, unique elements: 203, collisions: 1.0, collisions % 512: 1.26875, keys min: 1, avg: 5, max: 14
224 | Avg time WyHash, 30.666916916916918: total elements: 999, unique elements: 203, collisions: 1.0, collisions % 512: 1.2083333333333333, keys min: 1, avg: 5, max: 14
225 | Avg time Default, 28.71331331331331: total elements: 999, unique elements: 192, collisions: 1.0, collisions % 512: 1.238709677419355, keys min: 1, avg: 5, max: 13
226 | Avg time FxHasher, 25.97787787787788: total elements: 999, unique elements: 192, collisions: 1.0, collisions % 512: 1.263157894736842, keys min: 1, avg: 5, max: 13
227 | Avg time AHasher, 26.035535535535537: total elements: 999, unique elements: 192, collisions: 1.0, collisions % 512: 1.238709677419355, keys min: 1, avg: 5, max: 13
228 | Avg time WyHash, 26.91166166166166: total elements: 999, unique elements: 192, collisions: 1.0, collisions % 512: 1.2075471698113207, keys min: 1, avg: 5, max: 13
229 | Avg time Default, 24.716066066066066: total elements: 999, unique elements: 532, collisions: 1.0, collisions % 512: 1.5647058823529412, keys min: 2, avg: 12, max: 37
230 | Avg time FxHasher, 23.58993993993994: total elements: 999, unique elements: 532, collisions: 1.0, collisions % 512: 1.6419753086419753, keys min: 2, avg: 12, max: 37
231 | Avg time AHasher, 23.47817817817818: total elements: 999, unique elements: 532, collisions: 1.0, collisions % 512: 1.6269113149847094, keys min: 2, avg: 12, max: 37
232 | Avg time WyHash, 21.5007007007007: total elements: 999, unique elements: 532, collisions: 1.0, collisions % 512: 1.636923076923077, keys min: 2, avg: 12, max: 37
233 | Avg time Default, 21.82362362362362: total elements: 999, unique elements: 208, collisions: 1.0, collisions % 512: 1.2606060606060605, keys min: 2, avg: 6, max: 18
234 | Avg time FxHasher, 21.375575575575578: total elements: 999, unique elements: 208, collisions: 1.0, collisions % 512: 1.2606060606060605, keys min: 2, avg: 6, max: 18
235 | Avg time AHasher, 20.11911911911912: total elements: 999, unique elements: 208, collisions: 1.0, collisions % 512: 1.2455089820359282, keys min: 2, avg: 6, max: 18
236 | Avg time WyHash, 20.31836836836837: total elements: 999, unique elements: 208, collisions: 1.0, collisions % 512: 1.1751412429378532, keys min: 2, avg: 6, max: 18
237 | Avg time Default, 181.005: total elements: 10, unique elements: 10, collisions: 1.0, collisions % 512: 1.0, keys min: 378, avg: 499, max: 558
238 | Avg time FxHasher, 122.93499999999999: total elements: 10, unique elements: 10, collisions: 1.0, collisions % 512: 1.0, keys min: 378, avg: 499, max: 558
239 | Avg time AHasher, 45.04: total elements: 10, unique elements: 10, collisions: 1.0, collisions % 512: 1.0, keys min: 378, avg: 499, max: 558
240 | Avg time WyHash, 31.005000000000003: total elements: 10, unique elements: 10, collisions: 1.0, collisions % 512: 1.0, keys min: 378, avg: 499, max: 558
241 | Avg time Default, 21.469254658385093: total elements: 161, unique elements: 143, collisions: 1.0, collisions % 512: 1.125984251968504, keys min: 8, avg: 22, max: 43
242 | Avg time FxHasher, 20.225155279503106: total elements: 161, unique elements: 143, collisions: 1.0, collisions % 512: 1.1916666666666667, keys min: 8, avg: 22, max: 43
243 | Avg time AHasher, 20.106832298136645: total elements: 161, unique elements: 143, collisions: 1.0, collisions % 512: 1.125984251968504, keys min: 8, avg: 22, max: 43
244 | Avg time WyHash, 19.890993788819873: total elements: 161, unique elements: 143, collisions: 1.0, collisions % 512: 1.1349206349206349, keys min: 8, avg: 22, max: 43
245 | ```
246 |
247 | **Swift**
248 |
249 | ```
250 | Avg time: 85.945, total elements: 100, unique elements: 82, collisions: 1.0, collisions % 512: 1.1388888, keys min: 2, avg: 6, max: 12
251 | Avg time: 67.80245, total elements: 999, unique elements: 203, collisions: 1.0, collisions % 512: 1.2083334, keys min: 1, avg: 5, max: 14
252 | Avg time: 65.73403, total elements: 999, unique elements: 192, collisions: 1.0, collisions % 512: 1.2, keys min: 1, avg: 5, max: 13
253 | Avg time: 240.52744, total elements: 999, unique elements: 532, collisions: 1.0, collisions % 512: 1.4224598, keys min: 1, avg: 6, max: 19
254 | Avg time: 79.92162, total elements: 999, unique elements: 208, collisions: 1.0, collisions % 512: 1.2163743, keys min: 1, avg: 6, max: 18
255 | Avg time: 1773.4, total elements: 10, unique elements: 10, collisions: 1.0, collisions % 512: 1.1111112, keys min: 130, avg: 171, max: 192
256 | Avg time: 140.84721, total elements: 161, unique elements: 143, collisions: 1.0, collisions % 512: 1.0592593, keys min: 8, avg: 22, max: 43
257 | ```
258 |
259 | **Python**
260 |
261 | ```
262 | Avg time: 76.5, total elements: 100, unique elements: 82, collisions: 1.0, collisions % 512: 1.1232876712328768, keys min: 2, avg: 6.012195121951219, max: 12
263 | Avg time: 71.52152152152152, total elements: 999, unique elements: 203, collisions: 1.0, collisions % 512: 1.215568862275449, keys min: 1, avg: 5.862068965517241, max: 14
264 | Avg time: 81.63163163163163, total elements: 999, unique elements: 192, collisions: 1.0, collisions % 512: 1.2, keys min: 1, avg: 5.385416666666667, max: 13
265 | Avg time: 79.72972972972973, total elements: 999, unique elements: 532, collisions: 1.0, collisions % 512: 1.592814371257485, keys min: 1, avg: 6.593984962406015, max: 19
266 | Avg time: 78.62862862862863, total elements: 999, unique elements: 208, collisions: 1.0, collisions % 512: 1.2023121387283238, keys min: 1, avg: 6.394230769230769, max: 18
267 | Avg time: 70.0, total elements: 10, unique elements: 10, collisions: 1.0, collisions % 512: 1.0, keys min: 130, avg: 171.4, max: 192
268 | Avg time: 75.77639751552795, total elements: 161, unique elements: 143, collisions: 1.0, collisions % 512: 1.125984251968504, keys min: 8, avg: 22.6013986013986, max: 43
269 | ```
270 |
271 | **NodeJS**
272 |
273 | ```
274 | Avg time WyHash: 5024.798, total elements: 100, unique elements: 82, collisions: 1, collisions % 512: 1.0512820512820513, keys min: 2, avg: 6.012195121951219, max: 12
275 | Avg time xxHash: 6870.5885, total elements: 100, unique elements: 82, collisions: 1, collisions % 512: 1.0933333333333333, keys min: 2, avg: 6.012195121951219, max: 12
276 | Avg time WyHash: 3688.6434934934937, total elements: 999, unique elements: 203, collisions: 1, collisions % 512: 1.26875, keys min: 1, avg: 5.862068965517241, max: 14
277 | Avg time xxHash: 4461.4131131131135, total elements: 999, unique elements: 203, collisions: 1, collisions % 512: 1.180232558139535, keys min: 1, avg: 5.862068965517241, max: 14
278 | Avg time WyHash: 3393.779079079079, total elements: 999, unique elements: 192, collisions: 1, collisions % 512: 1.1566265060240963, keys min: 1, avg: 5.385416666666667, max: 13
279 | Avg time xxHash: 4050.333833833834, total elements: 999, unique elements: 192, collisions: 1, collisions % 512: 1.1497005988023952, keys min: 1, avg: 5.385416666666667, max: 13
280 | Avg time WyHash: 4635.906556556557, total elements: 999, unique elements: 532, collisions: 1, collisions % 512: 1.6269113149847094, keys min: 1, avg: 6.593984962406015, max: 19
281 | Avg time xxHash: 5929.1773773773775, total elements: 999, unique elements: 532, collisions: 1, collisions % 512: 1.5880597014925373, keys min: 1, avg: 6.593984962406015, max: 19
282 | Avg time WyHash: 3601.807957957958, total elements: 999, unique elements: 208, collisions: 1, collisions % 512: 1.2093023255813953, keys min: 1, avg: 6.394230769230769, max: 18
283 | Avg time xxHash: 4370.727527527527, total elements: 999, unique elements: 208, collisions: 1, collisions % 512: 1.2682926829268293, keys min: 1, avg: 6.394230769230769, max: 18
284 | Avg time WyHash: 94997.88, total elements: 10, unique elements: 10, collisions: 1, collisions % 512: 1, keys min: 130, avg: 171.4, max: 192
285 | Avg time xxHash: 48261.055, total elements: 10, unique elements: 10, collisions: 1, collisions % 512: 1, keys min: 130, avg: 171.4, max: 192
286 | Avg time WyHash: 6467.241304347826, total elements: 161, unique elements: 143, collisions: 1, collisions % 512: 1.0916030534351144, keys min: 8, avg: 22.6013986013986, max: 43
287 | Avg time xxHash: 5186.903105590062, total elements: 161, unique elements: 143, collisions: 1, collisions % 512: 1.1349206349206349, keys min: 8, avg: 22.6013986013986, max: 43
288 | ```
289 |
290 | **Go**
291 |
292 | ```
293 | Avg time: 56.000000, total elements: 100, unique elements: 82, collisions: 1.000000, collisions mod 512: 1.108108, keys min: 2, avg: 6, max: 12
294 | Avg time: 53.353353, total elements: 999, unique elements: 203, collisions: 1.000000, collisions mod 512: 1.230303, keys min: 1, avg: 5, max: 14
295 | Avg time: 63.363363, total elements: 999, unique elements: 192, collisions: 1.000000, collisions mod 512: 1.215190, keys min: 1, avg: 5, max: 13
296 | Avg time: 57.507508, total elements: 999, unique elements: 532, collisions: 1.000000, collisions mod 512: 1.588060, keys min: 2, avg: 12, max: 37
297 | Avg time: 56.156156, total elements: 999, unique elements: 208, collisions: 1.000000, collisions mod 512: 1.238095, keys min: 2, avg: 6, max: 18
298 | Avg time: 115.000000, total elements: 10, unique elements: 10, collisions: 1.000000, collisions mod 512: 1.111111, keys min: 378, avg: 499, max: 558
299 | Avg time: 59.316770, total elements: 161, unique elements: 143, collisions: 1.000000, collisions mod 512: 1.108527, keys min: 8, avg: 22, max: 43
300 | ```
301 |
302 | ## Benchmark HashMap
303 |
304 | This repository also contains a simple HashMap implementation, which allows key to be of type String and value to conform with CollectionElement trait.
305 |
306 | ### Results
307 |
308 | CPU Specs: 11th Gen Intel(R) Core(TM) i7-1165G7 @ 2.80GHz
309 | Tested with corpus 7, which is a list of S3 actions (total count 161, unique count 143)
310 |
311 | ```
312 | AHash Avg put time 211.01180124223603
313 | AHash Avg get time 82.304968944099386
314 | WyHash Avg put time 206.67639751552795
315 | WyHash Avg get time 81.214285714285708
316 | FxHash64 Avg put time 223.24844720496895
317 | FxHash64 Avg get time 84.171428571428578
318 | StdHash Avg put time 634.18819875776398
319 | StdHash Avg get time 278.51801242236024
320 | ```
321 |
322 | MacMini M1, 2020
323 |
324 | ```
325 | AHash Avg put time 347.82608695652175
326 | AHash Avg get time 162.11180124223603
327 | WyHash Avg put time 363.35403726708074
328 | WyHash Avg get time 192.54658385093168
329 | FxHash64 Avg put time 418.63354037267078
330 | FxHash64 Avg get time 170.80745341614906
331 | StdHash Avg put time 583.22981366459624
332 | StdHash Avg get time 226.08695652173913
333 | ```
334 |
335 | 
--------------------------------------------------------------------------------
/benchmark_other_languages/python/hash_functions/benchmark_dict.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | from time import time_ns
3 |
4 | def main():
5 | corpus1 = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Quisque orci urna, pretium et porta ac, porttitor sit amet sem. Fusce sagittis lorem neque, vitae sollicitudin elit suscipit et. In interdum convallis nisl in ornare. Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia curae; Aliquam erat volutpat. Morbi mollis iaculis lectus ac tincidunt. Fusce nisi lacus, semper eu dignissim et, malesuada non mi. Sed euismod urna vel elit faucibus, eu bibendum ante fringilla. Curabitur tempus in turpis at mattis. Aliquam erat volutpat. Donec maximus elementum felis, sit amet dignissim augue tincidunt blandit. Aliquam fermentum, est eu mollis.".split(" ")
6 | corpus2 = "But I must explain to you how all this mistaken idea of denouncing pleasure and praising pain was born and I will give you a complete account of the system, and expound the actual teachings of the great explorer of the truth, the master-builder of human happiness. No one rejects, dislikes, or avoids pleasure itself, because it is pleasure, but because those who do not know how to pursue pleasure rationally encounter consequences that are extremely painful. Nor again is there anyone who loves or pursues or desires to obtain pain of itself, because it is pain, but because occasionally circumstances occur in which toil and pain can procure him some great pleasure. To take a trivial example, which of us ever undertakes laborious physical exercise, except to obtain some advantage from it? But who has any right to find fault with a man who chooses to enjoy a pleasure that has no annoying consequences, or one who avoids a pain that produces no resultant pleasure? On the other hand, we denounce with righteous indignation and dislike men who are so beguiled and demoralized by the charms of pleasure of the moment, so blinded by desire, that they cannot foresee the pain and trouble that are bound to ensue; and equal blame belongs to those who fail in their duty through weakness of will, which is the same as saying through shrinking from toil and pain. These cases are perfectly simple and easy to distinguish. In a free hour, when our power of choice is untrammelled and when nothing prevents our being able to do what we like best, every pleasure is to be welcomed and every pain avoided. But in certain circumstances and owing to the claims of duty or the obligations of business it will frequently occur that pleasures have to be repudiated and annoyances accepted. The wise man therefore always holds in these matters to this principle of selection: he rejects pleasures to secure other greater pleasures, or else he endures pains to avoid worse pains. But I must explain to you how all this mistaken idea of denouncing pleasure and praising pain was born and I will give you a complete account of the system, and expound the actual teachings of the great explorer of the truth, the master-builder of human happiness. No one rejects, dislikes, or avoids pleasure itself, because it is pleasure, but because those who do not know how to pursue pleasure rationally encounter consequences that are extremely painful. Nor again is there anyone who loves or pursues or desires to obtain pain of itself, because it is pain, but because occasionally circumstances occur in which toil and pain can procure him some great pleasure. To take a trivial example, which of us ever undertakes laborious physical exercise, except to obtain some advantage from it? But who has any right to find fault with a man who chooses to enjoy a pleasure that has no annoying consequences, or one who avoids a pain that produces no resultant pleasure? On the other hand, we denounce with righteous indignation and dislike men who are so beguiled and demoralized by the charms of pleasure of the moment, so blinded by desire, that they cannot foresee the pain and trouble that are bound to ensue; and equal blame belongs to those who fail in their duty through weakness of will, which is the same as saying through shrinking from toil and pain. These cases are perfectly simple and easy to distinguish. In a free hour, when our power of choice is untrammelled and when nothing prevents our being able to do what we like best, every pleasure is to be welcomed and every pain avoided. But in certain circumstances and owing to the claims of duty or the obligations of business it will frequently occur that pleasures have to be repudiated and annoyances accepted. The wise man therefore always holds in these matters to this principle of selection: he rejects pleasures to secure other greater pleasures, or else he endures pains to avoid worse pains.But I must explain to you how all this mistaken idea of denouncing pleasure and praising pain was born and I will give you a complete account of the system, and expound the actual teachings of the great explorer of the truth, the master-builder of human happiness. No one rejects, dislikes, or avoids pleasure itself, because it is pleasure, but because those who do not know how to pursue pleasure rationally encounter consequences that are extremely painful. Nor again is there anyone who loves or pursues or desires to obtain pain of itself, because it is pain, but because occasionally circumstances occur in which toil and pain can procure him some great pleasure. To take a trivial example, which of us ever undertakes laborious physical exercise, except to obtain some advantage from it? But who has any right to find fault with a man who chooses to enjoy a pleasure that has no annoying consequences, or one who avoids a pain that produces no resultant pleasure? On the other hand, we denounce with righteous indignation and dislike men who are so beguiled and demoralized by the charms of pleasure of the moment, so blinded by desire, that they cannot foresee the pain and trouble that are bound to ensue; and equal blame belongs to those who fail in their duty through weakness of will, which is the same as saying through shrinking from toil and pain. These cases are perfectly simple and easy to distinguish. In a free hour, when our power of choice is untrammelled and when nothing prevents our being able to do what we like best, every pleasure is to be welcomed and every pain avoided. But in certain circumstances and owing to the claims of duty or the obligations of business it will frequently occur that pleasures have to be repudiated and annoyances accepted. The wise man therefore always holds in these matters to this principle of selection:".split(" ")
7 | corpus3 = "A wonderful serenity has taken possession of my entire soul, like these sweet mornings of spring which I enjoy with my whole heart. I am alone, and feel the charm of existence in this spot, which was created for the bliss of souls like mine. I am so happy, my dear friend, so absorbed in the exquisite sense of mere tranquil existence, that I neglect my talents. I should be incapable of drawing a single stroke at the present moment; and yet I feel that I never was a greater artist than now. When, while the lovely valley teems with vapour around me, and the meridian sun strikes the upper surface of the impenetrable foliage of my trees, and but a few stray gleams steal into the inner sanctuary, I throw myself down among the tall grass by the trickling stream; and, as I lie close to the earth, a thousand unknown plants are noticed by me: when I hear the buzz of the little world among the stalks, and grow familiar with the countless indescribable forms of the insects and flies, then I feel the presence of the Almighty, who formed us in his own image, and the breath of that universal love which bears and sustains us, as it floats around us in an eternity of bliss; and then, my friend, when darkness overspreads my eyes, and heaven and earth seem to dwell in my soul and absorb its power, like the form of a beloved mistress, then I often think with longing, Oh, would I could describe these conceptions, could impress upon paper all that is living so full and warm within me, that it might be the mirror of my soul, as my soul is the mirror of the infinite God! O my friend -- but it is too much for my strength -- I sink under the weight of the splendour of these visions! A wonderful serenity has taken possession of my entire soul, like these sweet mornings of spring which I enjoy with my whole heart. I am alone, and feel the charm of existence in this spot, which was created for the bliss of souls like mine. I am so happy, my dear friend, so absorbed in the exquisite sense of mere tranquil existence, that I neglect my talents. I should be incapable of drawing a single stroke at the present moment; and yet I feel that I never was a greater artist than now. When, while the lovely valley teems with vapour around me, and the meridian sun strikes the upper surface of the impenetrable foliage of my trees, and but a few stray gleams steal into the inner sanctuary, I throw myself down among the tall grass by the trickling stream; and, as I lie close to the earth, a thousand unknown plants are noticed by me: when I hear the buzz of the little world among the stalks, and grow familiar with the countless indescribable forms of the insects and flies, then I feel the presence of the Almighty, who formed us in his own image, and the breath of that universal love which bears and sustains us, as it floats around us in an eternity of bliss; and then, my friend, when darkness overspreads my eyes, and heaven and earth seem to dwell in my soul and absorb its power, like the form of a beloved mistress, then I often think with longing, Oh, would I could describe these conceptions, could impress upon paper all that is living so full and warm within me, that it might be the mirror of my soul, as my soul is the mirror of the infinite God! O my friend -- but it is too much for my strength -- I sink under the weight of the splendour of these visions! A wonderful serenity has taken possession of my entire soul, like these sweet mornings of spring which I enjoy with my whole heart. I am alone, and feel the charm of existence in this spot, which was created for the bliss of souls like mine. I am so happy, my dear friend, so absorbed in the exquisite sense of mere tranquil existence, that I neglect my talents. I should be incapable of drawing a single stroke at the present moment; and yet I feel that I never was a greater artist than now. When, while the lovely valley teems with vapour around me, and the meridian sun strikes the upper surface of the impenetrable foliage of my trees, and but a few stray gleams steal into the inner sanctuary, I throw myself down among the tall grass by the trickling stream; and, as I lie close to the earth, a thousand unknown plants are noticed by me: when I hear the buzz of the little world among the stalks, and grow familiar with the countless indescribable forms of the insects and flies, then I feel the presence of the Almighty, who formed us in his own image, and the breath of that universal love which bears and sustains us, as it floats around us in an eternity of bliss; and then, my friend, when darkness overspreads my eyes, and heaven and earth seem to dwell in my soul and absorb its power, like the form of a beloved mistress, then I often think with longing, Oh, would I could describe these conceptions, could impress upon paper all that is living so full and warm within me, that it might be the mirror of my soul, as my soul is the mirror of the infinite God! O my friend -- but it is too much for my strength -- I sink under the weight of the splendour of these visions!A wonderful serenity has taken possession of my entire soul, like these sweet mornings of spring which I enjoy with my whole heart. I am alone, and feel the charm of existence in this spot, which was created for the bliss of souls".split(" ")
8 | corpus4 = "Проснувшись однажды утром после беспокойного сна, Грегор Замза обнаружил, что он у себя в постели превратился в страшное насекомое. Лежа на панцирнотвердой спине, он видел, стоило ему приподнять голову, свой коричневый, выпуклый, разделенный дугообразными чешуйками живот, на верхушке которого еле держалось готовое вот-вот окончательно сползти одеяло. Его многочисленные, убого тонкие по сравнению с остальным телом ножки беспомощно копошились у него перед глазами. «Что со мной случилось?» – подумал он. Это не было сном. Его комната, настоящая, разве что слишком маленькая, но обычная комната, мирно покоилась в своих четырех хорошо знакомых стенах. Над столом, где были разложены распакованные образцы сукон – Замза был коммивояжером, – висел портрет, который он недавно вырезал из иллюстрированного журнала и вставил в красивую золоченую рамку. На портрете была изображена дама в меховой шляпе и боа, она сидела очень прямо и протягивала зрителю тяжелую меховую муфту, в которой целиком исчезала ее рука. Затем взгляд Грегора устремился в окно, и пасмурная погода – слышно было, как по жести подоконника стучат капли дождя – привела его и вовсе в грустное настроение. «Хорошо бы еще немного поспать и забыть всю эту чепуху», – подумал он, но это было совершенно неосуществимо, он привык спать на правом боку, а в теперешнем своем состоянии он никак не мог принять этого положения. С какой бы силой ни поворачивался он на правый бок, он неизменно сваливался опять на спину. Закрыв глаза, чтобы не видеть своих барахтающихся ног, он проделал это добрую сотню раз и отказался от этих попыток только тогда, когда почувствовал какую-то неведомую дотоле, тупую и слабую боль в боку. «Ах ты, господи, – подумал он, – какую я выбрал хлопотную профессию! Изо дня в день в разъездах. Деловых волнений куда больше, чем на месте, в торговом доме, а кроме того, изволь терпеть тяготы дороги, думай о расписании поездов, мирись с плохим, нерегулярным питанием, завязывай со все новыми и новыми людьми недолгие, никогда не бывающие сердечными отношения. Черт бы побрал все это!» Он почувствовал вверху живота легкий зуд; медленно подвинулся на спине к прутьям кровати, чтобы удобнее было поднять голову; нашел зудевшее место, сплошь покрытое, как оказалось, белыми непонятными точечками; хотел было ощупать это место одной из ножек, но сразу отдернул ее, ибо даже простое прикосновение вызвало у него, Грегора, озноб. Он соскользнул в прежнее свое положение. «От этого раннего вставания, – подумал он, – можно совсем обезуметь. Человек должен высыпаться. Другие коммивояжеры живут, как одалиски. Когда я, например, среди дня возвращаюсь в гостиницу, чтобы переписать полученные заказы, эти господа только завтракают. А осмелься я вести себя так, мои хозяин выгнал бы меня сразу. Кто знает, впрочем, может быть, это было бы даже очень хорошо для меня. Если бы я не сдерживался ради родителей, я бы давно заявил об уходе, я бы подошел к своему хозяину и выложил ему все, что о нем думаю. Он бы так и свалился с конторки! Странная у него манера – садиться на конторку и с ее высоты разговаривать со служащим, который вдобавок вынужден подойти вплотную к конторке из-за того, что хозяин туг на ухо. Однако надежда еще не совсем потеряна: как только я накоплю денег, чтобы выплатить долг моих родителей – на это уйдет еще лет пять-шесть, – я так и поступлю. Тут-то мы и распрощаемся раз и навсегда. А пока что надо подниматься, мой поезд отходит в пять». И он взглянул на будильник, который тикал на сундуке. «Боже правый!» – подумал он. Было половина седьмого, и стрелки спокойно двигались дальше, было даже больше половины, без малого уже три четверти. Неужели будильник не звонил? С кровати было видно, что он поставлен правильно, на четыре часа; и он, несомненно, звонил. Но как можно было спокойно спать под этот сотрясающий мебель трезвон? Ну, спал-то он неспокойно, но, видимо, крепко. Однако что делать теперь? Следующий поезд уходит в семь часов; чтобы поспеть на него, он должен отчаянно торопиться, а набор образцов еще не упакован, да и сам он отнюдь не чувствует себя свежим и легким на подъем. И даже поспей он на поезд, хозяйского разноса ему все равно не избежать – ведь рассыльный торгового дома дежурил у пятичасового поезда и давно доложил о его, Грегора, опоздании. Рассыльный, человек бесхарактерный и неумный, был ставленником хозяина. А что, если сказаться больным? Но это было бы крайне неприятно и показалось бы подозрительным, ибо за пятилетнюю свою службу Грегор ни разу еще не болел. Хозяин, конечно, привел бы врача больничной кассы и стал попрекать родителей сыном-лентяем, отводя любые возражения ссылкой на этого врача, по мнению которого все люди на свете совершенно здоровы и только не любят работать. И разве в данном случае он был бы так уж неправ? Если не считать сонливости, действительно странной после такого долгого сна, Грегор и в самом деле чувствовал себя превосходно и был даже чертовски голоден.Проснувшись однажды утром после беспокойного сна, Грегор Замза обнаружил, что он у себя в постели превратился в страшное насекомое. Лежа на панцирнотвердой спине, он видел, стоило ему приподнять голову, свой коричневый, выпуклый, разделенный дугообразными чешуйками живот, на верхушке которого еле держалось готовое вот-вот окончательно сползти одеяло. Его многочисленные, убого тонкие по сравнению с остальным телом ножки беспомощно копошились у него перед глазами. «Что со мной случилось?» – подумал он. Это не было сном. Его комната, настоящая, разве что слишком маленькая, но обычная комната, мирно покоилась в своих четырех хорошо знакомых стенах. Над столом, где были разложены распакованные образцы сукон – Замза был коммивояжером, – висел портрет, который он недавно вырезал из иллюстрированного журнала и вставил в красивую золоченую рамку. На портрете была изображена дама в меховой шляпе и боа, она сидела очень прямо и протягивала зрителю тяжелую меховую муфту, в которой целиком исчезала ее рука. Затем взгляд Грегора устремился в окно, и пасмурная погода – слышно было, как по жести подоконника стучат капли дождя – привела его и вовсе в грустное настроение. «Хорошо бы еще немного поспать и забыть всю эту чепуху», – подумал он, но это было совершенно неосуществимо, он привык спать на правом боку, а в теперешнем своем состоянии он никак не мог принять этого положения. С какой бы силой ни поворачивался он на правый бок, он неизменно сваливался опять на спину.".split(" ")
9 | corpus5 = "Weit hinten, hinter den Wortbergen, fern der Länder Vokalien und Konsonantien leben die Blindtexte. Abgeschieden wohnen sie in Buchstabhausen an der Küste des Semantik, eines großen Sprachozeans. Ein kleines Bächlein namens Duden fließt durch ihren Ort und versorgt sie mit den nötigen Regelialien. Es ist ein paradiesmatisches Land, in dem einem gebratene Satzteile in den Mund fliegen. Nicht einmal von der allmächtigen Interpunktion werden die Blindtexte beherrscht – ein geradezu unorthographisches Leben. Eines Tages aber beschloß eine kleine Zeile Blindtext, ihr Name war Lorem Ipsum, hinaus zu gehen in die weite Grammatik. Der große Oxmox riet ihr davon ab, da es dort wimmele von bösen Kommata, wilden Fragezeichen und hinterhältigen Semikoli, doch das Blindtextchen ließ sich nicht beirren. Es packte seine sieben Versalien, schob sich sein Initial in den Gürtel und machte sich auf den Weg. Als es die ersten Hügel des Kursivgebirges erklommen hatte, warf es einen letzten Blick zurück auf die Skyline seiner Heimatstadt Buchstabhausen, die Headline von Alphabetdorf und die Subline seiner eigenen Straße, der Zeilengasse. Wehmütig lief ihm eine rhetorische Frage über die Wange, dann setzte es seinen Weg fort. Unterwegs traf es eine Copy. Die Copy warnte das Blindtextchen, da, wo sie herkäme wäre sie zigmal umgeschrieben worden und alles, was von ihrem Ursprung noch übrig wäre, sei das Wort \"und\" und das Blindtextchen solle umkehren und wieder in sein eigenes, sicheres Land zurückkehren. Doch alles Gutzureden konnte es nicht überzeugen und so dauerte es nicht lange, bis ihm ein paar heimtückische Werbetexter auflauerten, es mit Longe und Parole betrunken machten und es dann in ihre Agentur schleppten, wo sie es für ihre Projekte wieder und wieder mißbrauchten. Und wenn es nicht umgeschrieben wurde, dann benutzen Sie es immernoch. Weit hinten, hinter den Wortbergen, fern der Länder Vokalien und Konsonantien leben die Blindtexte. Abgeschieden wohnen sie in Buchstabhausen an der Küste des Semantik, eines großen Sprachozeans. Ein kleines Bächlein namens Duden fließt durch ihren Ort und versorgt sie mit den nötigen Regelialien. Es ist ein paradiesmatisches Land, in dem einem gebratene Satzteile in den Mund fliegen. Nicht einmal von der allmächtigen Interpunktion werden die Blindtexte beherrscht – ein geradezu unorthographisches Leben. Eines Tages aber beschloß eine kleine Zeile Blindtext, ihr Name war Lorem Ipsum, hinaus zu gehen in die weite Grammatik. Der große Oxmox riet ihr davon ab, da es dort wimmele von bösen Kommata, wilden Fragezeichen und hinterhältigen Semikoli, doch das Blindtextchen ließ sich nicht beirren. Es packte seine sieben Versalien, schob sich sein Initial in den Gürtel und machte sich auf den Weg. Als es die ersten Hügel des Kursivgebirges erklommen hatte, warf es einen letzten Blick zurück auf die Skyline seiner Heimatstadt Buchstabhausen, die Headline von Alphabetdorf und die Subline seiner eigenen Straße, der Zeilengasse. Wehmütig lief ihm eine rhetorische Frage über die Wange, dann setzte es seinen Weg fort. Unterwegs traf es eine Copy. Die Copy warnte das Blindtextchen, da, wo sie herkäme wäre sie zigmal umgeschrieben worden und alles, was von ihrem Ursprung noch übrig wäre, sei das Wort \"und\" und das Blindtextchen solle umkehren und wieder in sein eigenes, sicheres Land zurückkehren. Doch alles Gutzureden konnte es nicht überzeugen und so dauerte es nicht lange, bis ihm ein paar heimtückische Werbetexter auflauerten, es mit Longe und Parole betrunken machten und es dann in ihre Agentur schleppten, wo sie es für ihre Projekte wieder und wieder mißbrauchten. Und wenn es nicht umgeschrieben wurde, dann benutzen Sie es immernoch. Weit hinten, hinter den Wortbergen, fern der Länder Vokalien und Konsonantien leben die Blindtexte. Abgeschieden wohnen sie in Buchstabhausen an der Küste des Semantik, eines großen Sprachozeans. Ein kleines Bächlein namens Duden fließt durch ihren Ort und versorgt sie mit den nötigen Regelialien. Es ist ein paradiesmatisches Land, in dem einem gebratene Satzteile in den Mund fliegen. Nicht einmal von der allmächtigen Interpunktion werden die Blindtexte beherrscht – ein geradezu unorthographisches Leben. Eines Tages aber beschloß eine kleine Zeile Blindtext, ihr Name war Lorem Ipsum, hinaus zu gehen in die weite Grammatik. Der große Oxmox riet ihr davon ab, da es dort wimmele von bösen Kommata, wilden Fragezeichen und hinterhältigen Semikoli, doch das Blindtextchen ließ sich nicht beirren. Es packte seine sieben Versalien, schob sich sein Initial in den Gürtel und machte sich auf den Weg. Als es die ersten Hügel des Kursivgebirges erklommen hatte, warf es einen letzten Blick zurück auf die Skyline seiner Heimatstadt Buchstabhausen, die Headline von Alphabetdorf und die Subline seiner eigenen Straße, der Zeilengasse. Wehmütig lief ihm eine rhetorische Frage über die Wange, dann setzte es seinen Weg fort. Unterwegs traf es eine Copy. Die Copy warnte das Blindtextchen, da, wo sie herkäme wäre sie zigmal umgeschrieben worden und alles, was von ihrem Ursprung noch übrig wäre, sei das Wort \"und\" und das Blindtextchen solle umkehren und wieder in sein eigenes, sicheres Land zurückkehren. Doch alles Gutzureden konnte es nicht überzeugen und so dauerte es nicht lange, bis ihm ein paar heimtückische Werbetexter auflauerten, es mit Longe und Parole betrunken machten und es dann in ihre Agentur schleppten, wo sie es für ihre Projekte wieder und wieder mißbrauchten. Und wenn es nicht umgeschrieben wurde, dann benutzen Sie es immernoch.Weit hinten, hinter den Wortbergen, fern der Länder Vokalien und Konsonantien leben die Blindtexte. Abgeschieden wohnen sie in Buchstabhausen an der Küste des Semantik, eines großen Sprachozeans. Ein kleines Bächlein namens Duden fließt durch ihren Ort und versorgt sie mit den nötigen Regelialien. Es ist ein paradiesmatisches Land, in dem einem gebratene Satzteile in den Mund fliegen. Nicht einmal von der allmächtigen Interpunktion werden die Blindtexte beherrscht – ein geradezu unorthographisches Leben. Eines Tages aber beschloß eine kleine Zeile Blindtext, ihr Name war Lorem Ipsum, hinaus zu gehen in die weite Grammatik. Der große Oxmox riet ihr davon ab, da es dort wimmele von bösen Kommata, wilden Fragezeichen und hinterhältigen Semikoli, doch das Blindtextchen ließ sich nicht beirren. Es packte seine sieben Versalien, schob sich sein Initial in den Gürtel und machte sich auf den Weg. Als es die ersten Hügel des Kursivgebirges erklommen hatte, warf es einen".split(" ")
10 | corpus6 = "米くを舵4物委らご氏松ハナテフ月関ソ時平ふいの博情れじフ牟万い元56園フメヤオ試図ロツヤ未備王こと傷喫羅踊んゆし。栃ユヱオ書著作ユソツロ英祉業ア大課ご権質フべ空8午キ切軟づン著郎そゃす格町採ヱオマコ処8付国ムハチア究表でなだ際無ロミヱ地兵ぴげ庭体すク発抜爆位や。楽富むゆず盛航カナセ携代ハ本高きた員59今骸ンラえぜ城解イケ穴訴ぽぎ属住ヤケトヌ抱点ト広注厚でて。 国リ出難セユメ軍手ヘカウ画形サヲシ猛85用ヲキミ心死よしと身処ケヨミオ教主ーぽ事業んく字国たさょ図能シミスヤ社8板ル岡世58次戒知院んれり。市メ誘根カ数問禁竹ゃれえみ給辺のでみき今二ぎさ裕止過こクすと無32郎所ラた生展ヌヘス成度慣葬勇厘ばてか。室ゃ下携疲ム色権がぽりっ銃週ノオ姫千テム健蔵い研手ッ放容ル告属め旅側26企サノヨ宅都福ぞ通待ちぴね種脳イど労希望義通むン。 罰しい続負せ著低たル異師ユハワ東添質コチ転集ルヤ雇聴約ヒ前統らた情厳ゆさでや真胸や有披暑棚豆ゆぼたけ。盛ワセロナ情競クるっわ講3音ずをせ少地めしぜょ手63明視れに判企ヒヌエソ求総58特本ね井比ユラキ禁頭馬るゅリす能率率かがさわ。葉サソ医郡ヱヘソ労帰ナケスミ救写ワヘ株審ネヒニミ安逮イ人画ラ涯車はラ極騒りなド件5級ンかふー劇41著ぱぐ凱討だ文世ぶづどま界善魅マ渓経競融れがや。 連ーぜらご模分ッ視外ばフく運発群ほぼづ育越一ほごクけ案募ヲイソ治会イせフ製君ぜた漢村1変リヒ構5際ツ御文ヲ臭入さドぼ代書ハケ引技ろみれ回観注倉徹ぱ。論ラづ海要サ情座ゃり齢宣ラモエ芸化エマホ覧催回ら戦69本外ト葬岳な政画か連針ぴリフず。約ル闘辺ぽ経2応掲ホサアラ塾小コラ画決クノオ上室レヌヱ勝逮ぜるえむ責豊チノ明意ひけ訟6碁草メタチエ財午召喝塊む。 決めでわ名金つけレわ続人県約ぽぼす尾腹ユサ戦載リシ護賀レモフツ重涯ニ治者むんっみ職更カタチレ提話2何ワ責東まけげふ能政ヌ供禁がびてわ提改倶れめ。読み担後ぽ安加ぎ論鹿ツ統最お気麻月つじもあ竜思いろめ判必満理トコ文連ムイウハ寄串ざほびー。文ゆこっ向27年メイ便能ノセヲ待1王スねたゆ伝派んね点過カト治読よにきべ使人スシ都言え阻8割べづえみ注引敷的岳犠眠どそ。 学用イだ医客開ロ供界もぞだ実隆モイヌ務坂ナコヲ権野ろづ初場ぱ低会づぱじ新倒コ化政レ止奮浸猪ッわえづ。形いやリ要帰ほまむだ業領スル必打さ島14巻リ集日ネヘホタ面幅ち写上そぴ円図ムタコモ報使イわざと会催ヤヲ康証をドぶレ盤岡ホハツ作29管しをめ公問懐蓄っさ。来ゆぼあぱ投秋シ語右ぐ身靖かば辛握捕家記ヘワ神岐囲づ毘観メテクツ政73夕罪57需93誌飲査仁さ。 変レめ束球よんま会特ヱコ聞重だ史純ーどる件32浦レぴよゃ上強ネラリロ査従セユヤ専棋光レ作表ひぶ予正ぜーな誉確フス函6報円ス進治ね能営済否雄でわょ。42生型ば着続ア短実ぎおめび前環闘ラヤヲル診均っとにの声公トヱテマ整試椅情久妊舌頃ざとっく。品キチトテ阿国ラら受87世ヲフセリ川86個ーょぼげ危子ヘレカメ無会ぱかへ事通んかて電条ロツ徴商ぶぞそを居暑メ害広せもがり禁応レミヲ応響割壮憶はぱ。 千れンが織財メニ況界ネトレミ学豊フオホシ近月レたやご的罪ょな菱技ちる警栗エセ提89林危氷48参ア説森クキヒヱ薬社ホコエリ負和ルび紀下ケミイ掲歳特ごず扱底ク護木連ちクを各形ばすか。変ぱなれ町7融ヌ街準以タユヘム質裕ぶで遺語俊ぎずょ事金文キ写多山ーゆに歩帯すで会世クぜよ論写ヲ達71林危氷5間続ぎぜび高怠す。 係8青け応著ミ戦条ナヘネカ思79未ぎ算伊をゃ泉人ーづ需説っ畑鹿27軽ラソツ権2促千護ルロナカ開国ケ暴嶋ご池表だ。佐フナ訪麻はてせば勝効をあ医戦画とさわぴ者両すいあ並来んば載食ぴ件友頂業へえぞ魚祝ネラ聞率スコリケ始全ンこび夫出ドふ今布うぎふゅ実克即哉循やしんな。 暮す備54依紀てッん末刊と柔称むてス無府ケイ変壌をぱ汁連フマス海世ヌ中負知問ナヘケ純推ひ読着ヒ言若私軽れ。掲けフむ王本オコ線人をっさ必和断セソヲハ図芸ちかな防長りぶは投新意相ツ並5余セ職岳ぞ端古空援そ。森ヨエチ題5東っ自兄ち暴5近鹿横ト的京ハ安氷ナキ深際ぎ並節くスむの権工ほルせ京49効タムチ処三ぞぴラ済国ずっ文経ヘトミ水分準そが。".split(" ")
11 | corpus7 = "AbortMultipartUpload CompleteMultipartUpload CopyObject CreateBucket CreateMultipartUpload DeleteBucket DeleteBucketAnalyticsConfiguration DeleteBucketCors DeleteBucketEncryption DeleteBucketIntelligentTieringConfiguration DeleteBucketInventoryConfiguration DeleteBucketLifecycle DeleteBucketMetricsConfiguration DeleteBucketOwnershipControls DeleteBucketPolicy DeleteBucketReplication DeleteBucketTagging DeleteBucketWebsite DeleteObject DeleteObjects DeleteObjectTagging DeletePublicAccessBlock GetBucketAccelerateConfiguration GetBucketAcl GetBucketAnalyticsConfiguration GetBucketCors GetBucketEncryption GetBucketIntelligentTieringConfiguration GetBucketInventoryConfiguration GetBucketLifecycle GetBucketLifecycleConfiguration GetBucketLocation GetBucketLogging GetBucketMetricsConfiguration GetBucketNotification GetBucketNotificationConfiguration GetBucketOwnershipControls GetBucketPolicy GetBucketPolicyStatus GetBucketReplication GetBucketRequestPayment GetBucketTagging GetBucketVersioning GetBucketWebsite GetObject GetObjectAcl GetObjectAttributes GetObjectLegalHold GetObjectLockConfiguration GetObjectRetention GetObjectTagging GetObjectTorrent GetPublicAccessBlock HeadBucket HeadObject ListBucketAnalyticsConfigurations ListBucketIntelligentTieringConfigurations ListBucketInventoryConfigurations ListBucketMetricsConfigurations ListBuckets ListMultipartUploads ListObjects ListObjectsV2 ListObjectVersions ListParts PutBucketAccelerateConfiguration PutBucketAcl PutBucketAnalyticsConfiguration PutBucketCors PutBucketEncryption PutBucketIntelligentTieringConfiguration PutBucketInventoryConfiguration PutBucketLifecycle PutBucketLifecycleConfiguration PutBucketLogging PutBucketMetricsConfiguration PutBucketNotification PutBucketNotificationConfiguration PutBucketOwnershipControls PutBucketPolicy PutBucketReplication PutBucketRequestPayment PutBucketTagging PutBucketVersioning PutBucketWebsite PutObject PutObjectAcl PutObjectLegalHold PutObjectLockConfiguration PutObjectRetention PutObjectTagging PutPublicAccessBlock RestoreObject SelectObjectContent UploadPart UploadPartCopy WriteGetObjectResponse CreateAccessPoint CreateAccessPointForObjectLambda CreateBucket CreateJob CreateMultiRegionAccessPoint DeleteAccessPoint DeleteAccessPointForObjectLambda DeleteAccessPointPolicy DeleteAccessPointPolicyForObjectLambda DeleteBucket DeleteBucketLifecycleConfiguration DeleteBucketPolicy DeleteBucketReplication DeleteBucketTagging DeleteJobTagging DeleteMultiRegionAccessPoint DeletePublicAccessBlock DeleteStorageLensConfiguration DeleteStorageLensConfigurationTagging DescribeJob DescribeMultiRegionAccessPointOperation GetAccessPoint GetAccessPointConfigurationForObjectLambda GetAccessPointForObjectLambda GetAccessPointPolicy GetAccessPointPolicyForObjectLambda GetAccessPointPolicyStatus GetAccessPointPolicyStatusForObjectLambda GetBucket GetBucketLifecycleConfiguration GetBucketPolicy GetBucketReplication GetBucketTagging GetBucketVersioning GetJobTagging GetMultiRegionAccessPoint GetMultiRegionAccessPointPolicy GetMultiRegionAccessPointPolicyStatus GetMultiRegionAccessPointRoutes GetPublicAccessBlock GetStorageLensConfiguration GetStorageLensConfigurationTagging ListAccessPoints ListAccessPointsForObjectLambda ListJobs ListMultiRegionAccessPoints ListRegionalBuckets ListStorageLensConfigurations PutAccessPointConfigurationForObjectLambda PutAccessPointPolicy PutAccessPointPolicyForObjectLambda PutBucketLifecycleConfiguration PutBucketPolicy PutBucketReplication PutBucketTagging PutBucketVersioning PutJobTagging PutMultiRegionAccessPointPolicy PutPublicAccessBlock PutStorageLensConfiguration PutStorageLensConfigurationTagging SubmitMultiRegionAccessPointRoutes UpdateJobPriority UpdateJobStatus".split(" ")
12 | corpus8 = Path("/usr/share/dict/words").read_text().splitlines()
13 | all = [corpus1, corpus2, corpus3, corpus4, corpus5, corpus6, corpus7, corpus8]
14 | for j, corpus in enumerate(all):
15 | total_put = 0
16 | total_get = 0
17 | for _ in range(10):
18 | sum = 0
19 | d = {}
20 |
21 | for i, c in enumerate(corpus):
22 | tik = time_ns()
23 | d[c] = i
24 | tok = time_ns()
25 | total_put += tok - tik
26 |
27 | for c in corpus:
28 | tik = time_ns()
29 | a = d[c]
30 | tok = time_ns()
31 | total_get += tok - tik
32 | sum += a
33 | print(f"Corpus {j + 1}")
34 | print(f"Avg time put: {(total_put / 10.0) / len(corpus)}")
35 | print(f"Avg time get: {(total_get / 10.0) / len(corpus)}")
36 | print(sum)
37 |
38 | if __name__ == "__main__":
39 | main()
40 |
--------------------------------------------------------------------------------
/benchmark_other_languages/python/hash_functions/benchmark.py:
--------------------------------------------------------------------------------
1 | from time import time_ns
2 |
3 | def benchmark(corpus: list[str]):
4 | total = 0
5 | for _ in range(20):
6 | k = set()
7 | v = set()
8 | v512 = set()
9 | for key in corpus:
10 | k.add(key)
11 | tik = time_ns()
12 | h = hash(key)
13 | tok = time_ns()
14 | total += (tok - tik)
15 | v.add(h)
16 | v512.add(h % 512)
17 | min = 10000000
18 | max = 0
19 | sum = 0
20 | for key in k:
21 | l = len(key)
22 | sum += l
23 | if l < min:
24 | min = l
25 | if l > max:
26 | max = l
27 | avg = sum / len(k)
28 |
29 | print(f"Avg time: {(total / 20.0) / len(corpus)}, total elements: {len(corpus)}, unique elements: {len(k)}, collisions: {len(k) / len(v)}, collisions % 512: {len(k) / len(v512)}, keys min: {min}, avg: {avg}, max: {max}")
30 |
31 |
32 | def main():
33 | corpus1 = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Quisque orci urna, pretium et porta ac, porttitor sit amet sem. Fusce sagittis lorem neque, vitae sollicitudin elit suscipit et. In interdum convallis nisl in ornare. Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia curae; Aliquam erat volutpat. Morbi mollis iaculis lectus ac tincidunt. Fusce nisi lacus, semper eu dignissim et, malesuada non mi. Sed euismod urna vel elit faucibus, eu bibendum ante fringilla. Curabitur tempus in turpis at mattis. Aliquam erat volutpat. Donec maximus elementum felis, sit amet dignissim augue tincidunt blandit. Aliquam fermentum, est eu mollis.".split(" ")
34 | corpus2 = "But I must explain to you how all this mistaken idea of denouncing pleasure and praising pain was born and I will give you a complete account of the system, and expound the actual teachings of the great explorer of the truth, the master-builder of human happiness. No one rejects, dislikes, or avoids pleasure itself, because it is pleasure, but because those who do not know how to pursue pleasure rationally encounter consequences that are extremely painful. Nor again is there anyone who loves or pursues or desires to obtain pain of itself, because it is pain, but because occasionally circumstances occur in which toil and pain can procure him some great pleasure. To take a trivial example, which of us ever undertakes laborious physical exercise, except to obtain some advantage from it? But who has any right to find fault with a man who chooses to enjoy a pleasure that has no annoying consequences, or one who avoids a pain that produces no resultant pleasure? On the other hand, we denounce with righteous indignation and dislike men who are so beguiled and demoralized by the charms of pleasure of the moment, so blinded by desire, that they cannot foresee the pain and trouble that are bound to ensue; and equal blame belongs to those who fail in their duty through weakness of will, which is the same as saying through shrinking from toil and pain. These cases are perfectly simple and easy to distinguish. In a free hour, when our power of choice is untrammelled and when nothing prevents our being able to do what we like best, every pleasure is to be welcomed and every pain avoided. But in certain circumstances and owing to the claims of duty or the obligations of business it will frequently occur that pleasures have to be repudiated and annoyances accepted. The wise man therefore always holds in these matters to this principle of selection: he rejects pleasures to secure other greater pleasures, or else he endures pains to avoid worse pains. But I must explain to you how all this mistaken idea of denouncing pleasure and praising pain was born and I will give you a complete account of the system, and expound the actual teachings of the great explorer of the truth, the master-builder of human happiness. No one rejects, dislikes, or avoids pleasure itself, because it is pleasure, but because those who do not know how to pursue pleasure rationally encounter consequences that are extremely painful. Nor again is there anyone who loves or pursues or desires to obtain pain of itself, because it is pain, but because occasionally circumstances occur in which toil and pain can procure him some great pleasure. To take a trivial example, which of us ever undertakes laborious physical exercise, except to obtain some advantage from it? But who has any right to find fault with a man who chooses to enjoy a pleasure that has no annoying consequences, or one who avoids a pain that produces no resultant pleasure? On the other hand, we denounce with righteous indignation and dislike men who are so beguiled and demoralized by the charms of pleasure of the moment, so blinded by desire, that they cannot foresee the pain and trouble that are bound to ensue; and equal blame belongs to those who fail in their duty through weakness of will, which is the same as saying through shrinking from toil and pain. These cases are perfectly simple and easy to distinguish. In a free hour, when our power of choice is untrammelled and when nothing prevents our being able to do what we like best, every pleasure is to be welcomed and every pain avoided. But in certain circumstances and owing to the claims of duty or the obligations of business it will frequently occur that pleasures have to be repudiated and annoyances accepted. The wise man therefore always holds in these matters to this principle of selection: he rejects pleasures to secure other greater pleasures, or else he endures pains to avoid worse pains.But I must explain to you how all this mistaken idea of denouncing pleasure and praising pain was born and I will give you a complete account of the system, and expound the actual teachings of the great explorer of the truth, the master-builder of human happiness. No one rejects, dislikes, or avoids pleasure itself, because it is pleasure, but because those who do not know how to pursue pleasure rationally encounter consequences that are extremely painful. Nor again is there anyone who loves or pursues or desires to obtain pain of itself, because it is pain, but because occasionally circumstances occur in which toil and pain can procure him some great pleasure. To take a trivial example, which of us ever undertakes laborious physical exercise, except to obtain some advantage from it? But who has any right to find fault with a man who chooses to enjoy a pleasure that has no annoying consequences, or one who avoids a pain that produces no resultant pleasure? On the other hand, we denounce with righteous indignation and dislike men who are so beguiled and demoralized by the charms of pleasure of the moment, so blinded by desire, that they cannot foresee the pain and trouble that are bound to ensue; and equal blame belongs to those who fail in their duty through weakness of will, which is the same as saying through shrinking from toil and pain. These cases are perfectly simple and easy to distinguish. In a free hour, when our power of choice is untrammelled and when nothing prevents our being able to do what we like best, every pleasure is to be welcomed and every pain avoided. But in certain circumstances and owing to the claims of duty or the obligations of business it will frequently occur that pleasures have to be repudiated and annoyances accepted. The wise man therefore always holds in these matters to this principle of selection:".split(" ")
35 | corpus3 = "A wonderful serenity has taken possession of my entire soul, like these sweet mornings of spring which I enjoy with my whole heart. I am alone, and feel the charm of existence in this spot, which was created for the bliss of souls like mine. I am so happy, my dear friend, so absorbed in the exquisite sense of mere tranquil existence, that I neglect my talents. I should be incapable of drawing a single stroke at the present moment; and yet I feel that I never was a greater artist than now. When, while the lovely valley teems with vapour around me, and the meridian sun strikes the upper surface of the impenetrable foliage of my trees, and but a few stray gleams steal into the inner sanctuary, I throw myself down among the tall grass by the trickling stream; and, as I lie close to the earth, a thousand unknown plants are noticed by me: when I hear the buzz of the little world among the stalks, and grow familiar with the countless indescribable forms of the insects and flies, then I feel the presence of the Almighty, who formed us in his own image, and the breath of that universal love which bears and sustains us, as it floats around us in an eternity of bliss; and then, my friend, when darkness overspreads my eyes, and heaven and earth seem to dwell in my soul and absorb its power, like the form of a beloved mistress, then I often think with longing, Oh, would I could describe these conceptions, could impress upon paper all that is living so full and warm within me, that it might be the mirror of my soul, as my soul is the mirror of the infinite God! O my friend -- but it is too much for my strength -- I sink under the weight of the splendour of these visions! A wonderful serenity has taken possession of my entire soul, like these sweet mornings of spring which I enjoy with my whole heart. I am alone, and feel the charm of existence in this spot, which was created for the bliss of souls like mine. I am so happy, my dear friend, so absorbed in the exquisite sense of mere tranquil existence, that I neglect my talents. I should be incapable of drawing a single stroke at the present moment; and yet I feel that I never was a greater artist than now. When, while the lovely valley teems with vapour around me, and the meridian sun strikes the upper surface of the impenetrable foliage of my trees, and but a few stray gleams steal into the inner sanctuary, I throw myself down among the tall grass by the trickling stream; and, as I lie close to the earth, a thousand unknown plants are noticed by me: when I hear the buzz of the little world among the stalks, and grow familiar with the countless indescribable forms of the insects and flies, then I feel the presence of the Almighty, who formed us in his own image, and the breath of that universal love which bears and sustains us, as it floats around us in an eternity of bliss; and then, my friend, when darkness overspreads my eyes, and heaven and earth seem to dwell in my soul and absorb its power, like the form of a beloved mistress, then I often think with longing, Oh, would I could describe these conceptions, could impress upon paper all that is living so full and warm within me, that it might be the mirror of my soul, as my soul is the mirror of the infinite God! O my friend -- but it is too much for my strength -- I sink under the weight of the splendour of these visions! A wonderful serenity has taken possession of my entire soul, like these sweet mornings of spring which I enjoy with my whole heart. I am alone, and feel the charm of existence in this spot, which was created for the bliss of souls like mine. I am so happy, my dear friend, so absorbed in the exquisite sense of mere tranquil existence, that I neglect my talents. I should be incapable of drawing a single stroke at the present moment; and yet I feel that I never was a greater artist than now. When, while the lovely valley teems with vapour around me, and the meridian sun strikes the upper surface of the impenetrable foliage of my trees, and but a few stray gleams steal into the inner sanctuary, I throw myself down among the tall grass by the trickling stream; and, as I lie close to the earth, a thousand unknown plants are noticed by me: when I hear the buzz of the little world among the stalks, and grow familiar with the countless indescribable forms of the insects and flies, then I feel the presence of the Almighty, who formed us in his own image, and the breath of that universal love which bears and sustains us, as it floats around us in an eternity of bliss; and then, my friend, when darkness overspreads my eyes, and heaven and earth seem to dwell in my soul and absorb its power, like the form of a beloved mistress, then I often think with longing, Oh, would I could describe these conceptions, could impress upon paper all that is living so full and warm within me, that it might be the mirror of my soul, as my soul is the mirror of the infinite God! O my friend -- but it is too much for my strength -- I sink under the weight of the splendour of these visions!A wonderful serenity has taken possession of my entire soul, like these sweet mornings of spring which I enjoy with my whole heart. I am alone, and feel the charm of existence in this spot, which was created for the bliss of souls".split(" ")
36 | corpus4 = "Проснувшись однажды утром после беспокойного сна, Грегор Замза обнаружил, что он у себя в постели превратился в страшное насекомое. Лежа на панцирнотвердой спине, он видел, стоило ему приподнять голову, свой коричневый, выпуклый, разделенный дугообразными чешуйками живот, на верхушке которого еле держалось готовое вот-вот окончательно сползти одеяло. Его многочисленные, убого тонкие по сравнению с остальным телом ножки беспомощно копошились у него перед глазами. «Что со мной случилось?» – подумал он. Это не было сном. Его комната, настоящая, разве что слишком маленькая, но обычная комната, мирно покоилась в своих четырех хорошо знакомых стенах. Над столом, где были разложены распакованные образцы сукон – Замза был коммивояжером, – висел портрет, который он недавно вырезал из иллюстрированного журнала и вставил в красивую золоченую рамку. На портрете была изображена дама в меховой шляпе и боа, она сидела очень прямо и протягивала зрителю тяжелую меховую муфту, в которой целиком исчезала ее рука. Затем взгляд Грегора устремился в окно, и пасмурная погода – слышно было, как по жести подоконника стучат капли дождя – привела его и вовсе в грустное настроение. «Хорошо бы еще немного поспать и забыть всю эту чепуху», – подумал он, но это было совершенно неосуществимо, он привык спать на правом боку, а в теперешнем своем состоянии он никак не мог принять этого положения. С какой бы силой ни поворачивался он на правый бок, он неизменно сваливался опять на спину. Закрыв глаза, чтобы не видеть своих барахтающихся ног, он проделал это добрую сотню раз и отказался от этих попыток только тогда, когда почувствовал какую-то неведомую дотоле, тупую и слабую боль в боку. «Ах ты, господи, – подумал он, – какую я выбрал хлопотную профессию! Изо дня в день в разъездах. Деловых волнений куда больше, чем на месте, в торговом доме, а кроме того, изволь терпеть тяготы дороги, думай о расписании поездов, мирись с плохим, нерегулярным питанием, завязывай со все новыми и новыми людьми недолгие, никогда не бывающие сердечными отношения. Черт бы побрал все это!» Он почувствовал вверху живота легкий зуд; медленно подвинулся на спине к прутьям кровати, чтобы удобнее было поднять голову; нашел зудевшее место, сплошь покрытое, как оказалось, белыми непонятными точечками; хотел было ощупать это место одной из ножек, но сразу отдернул ее, ибо даже простое прикосновение вызвало у него, Грегора, озноб. Он соскользнул в прежнее свое положение. «От этого раннего вставания, – подумал он, – можно совсем обезуметь. Человек должен высыпаться. Другие коммивояжеры живут, как одалиски. Когда я, например, среди дня возвращаюсь в гостиницу, чтобы переписать полученные заказы, эти господа только завтракают. А осмелься я вести себя так, мои хозяин выгнал бы меня сразу. Кто знает, впрочем, может быть, это было бы даже очень хорошо для меня. Если бы я не сдерживался ради родителей, я бы давно заявил об уходе, я бы подошел к своему хозяину и выложил ему все, что о нем думаю. Он бы так и свалился с конторки! Странная у него манера – садиться на конторку и с ее высоты разговаривать со служащим, который вдобавок вынужден подойти вплотную к конторке из-за того, что хозяин туг на ухо. Однако надежда еще не совсем потеряна: как только я накоплю денег, чтобы выплатить долг моих родителей – на это уйдет еще лет пять-шесть, – я так и поступлю. Тут-то мы и распрощаемся раз и навсегда. А пока что надо подниматься, мой поезд отходит в пять». И он взглянул на будильник, который тикал на сундуке. «Боже правый!» – подумал он. Было половина седьмого, и стрелки спокойно двигались дальше, было даже больше половины, без малого уже три четверти. Неужели будильник не звонил? С кровати было видно, что он поставлен правильно, на четыре часа; и он, несомненно, звонил. Но как можно было спокойно спать под этот сотрясающий мебель трезвон? Ну, спал-то он неспокойно, но, видимо, крепко. Однако что делать теперь? Следующий поезд уходит в семь часов; чтобы поспеть на него, он должен отчаянно торопиться, а набор образцов еще не упакован, да и сам он отнюдь не чувствует себя свежим и легким на подъем. И даже поспей он на поезд, хозяйского разноса ему все равно не избежать – ведь рассыльный торгового дома дежурил у пятичасового поезда и давно доложил о его, Грегора, опоздании. Рассыльный, человек бесхарактерный и неумный, был ставленником хозяина. А что, если сказаться больным? Но это было бы крайне неприятно и показалось бы подозрительным, ибо за пятилетнюю свою службу Грегор ни разу еще не болел. Хозяин, конечно, привел бы врача больничной кассы и стал попрекать родителей сыном-лентяем, отводя любые возражения ссылкой на этого врача, по мнению которого все люди на свете совершенно здоровы и только не любят работать. И разве в данном случае он был бы так уж неправ? Если не считать сонливости, действительно странной после такого долгого сна, Грегор и в самом деле чувствовал себя превосходно и был даже чертовски голоден.Проснувшись однажды утром после беспокойного сна, Грегор Замза обнаружил, что он у себя в постели превратился в страшное насекомое. Лежа на панцирнотвердой спине, он видел, стоило ему приподнять голову, свой коричневый, выпуклый, разделенный дугообразными чешуйками живот, на верхушке которого еле держалось готовое вот-вот окончательно сползти одеяло. Его многочисленные, убого тонкие по сравнению с остальным телом ножки беспомощно копошились у него перед глазами. «Что со мной случилось?» – подумал он. Это не было сном. Его комната, настоящая, разве что слишком маленькая, но обычная комната, мирно покоилась в своих четырех хорошо знакомых стенах. Над столом, где были разложены распакованные образцы сукон – Замза был коммивояжером, – висел портрет, который он недавно вырезал из иллюстрированного журнала и вставил в красивую золоченую рамку. На портрете была изображена дама в меховой шляпе и боа, она сидела очень прямо и протягивала зрителю тяжелую меховую муфту, в которой целиком исчезала ее рука. Затем взгляд Грегора устремился в окно, и пасмурная погода – слышно было, как по жести подоконника стучат капли дождя – привела его и вовсе в грустное настроение. «Хорошо бы еще немного поспать и забыть всю эту чепуху», – подумал он, но это было совершенно неосуществимо, он привык спать на правом боку, а в теперешнем своем состоянии он никак не мог принять этого положения. С какой бы силой ни поворачивался он на правый бок, он неизменно сваливался опять на спину.".split(" ")
37 | corpus5 = "Weit hinten, hinter den Wortbergen, fern der Länder Vokalien und Konsonantien leben die Blindtexte. Abgeschieden wohnen sie in Buchstabhausen an der Küste des Semantik, eines großen Sprachozeans. Ein kleines Bächlein namens Duden fließt durch ihren Ort und versorgt sie mit den nötigen Regelialien. Es ist ein paradiesmatisches Land, in dem einem gebratene Satzteile in den Mund fliegen. Nicht einmal von der allmächtigen Interpunktion werden die Blindtexte beherrscht – ein geradezu unorthographisches Leben. Eines Tages aber beschloß eine kleine Zeile Blindtext, ihr Name war Lorem Ipsum, hinaus zu gehen in die weite Grammatik. Der große Oxmox riet ihr davon ab, da es dort wimmele von bösen Kommata, wilden Fragezeichen und hinterhältigen Semikoli, doch das Blindtextchen ließ sich nicht beirren. Es packte seine sieben Versalien, schob sich sein Initial in den Gürtel und machte sich auf den Weg. Als es die ersten Hügel des Kursivgebirges erklommen hatte, warf es einen letzten Blick zurück auf die Skyline seiner Heimatstadt Buchstabhausen, die Headline von Alphabetdorf und die Subline seiner eigenen Straße, der Zeilengasse. Wehmütig lief ihm eine rhetorische Frage über die Wange, dann setzte es seinen Weg fort. Unterwegs traf es eine Copy. Die Copy warnte das Blindtextchen, da, wo sie herkäme wäre sie zigmal umgeschrieben worden und alles, was von ihrem Ursprung noch übrig wäre, sei das Wort \"und\" und das Blindtextchen solle umkehren und wieder in sein eigenes, sicheres Land zurückkehren. Doch alles Gutzureden konnte es nicht überzeugen und so dauerte es nicht lange, bis ihm ein paar heimtückische Werbetexter auflauerten, es mit Longe und Parole betrunken machten und es dann in ihre Agentur schleppten, wo sie es für ihre Projekte wieder und wieder mißbrauchten. Und wenn es nicht umgeschrieben wurde, dann benutzen Sie es immernoch. Weit hinten, hinter den Wortbergen, fern der Länder Vokalien und Konsonantien leben die Blindtexte. Abgeschieden wohnen sie in Buchstabhausen an der Küste des Semantik, eines großen Sprachozeans. Ein kleines Bächlein namens Duden fließt durch ihren Ort und versorgt sie mit den nötigen Regelialien. Es ist ein paradiesmatisches Land, in dem einem gebratene Satzteile in den Mund fliegen. Nicht einmal von der allmächtigen Interpunktion werden die Blindtexte beherrscht – ein geradezu unorthographisches Leben. Eines Tages aber beschloß eine kleine Zeile Blindtext, ihr Name war Lorem Ipsum, hinaus zu gehen in die weite Grammatik. Der große Oxmox riet ihr davon ab, da es dort wimmele von bösen Kommata, wilden Fragezeichen und hinterhältigen Semikoli, doch das Blindtextchen ließ sich nicht beirren. Es packte seine sieben Versalien, schob sich sein Initial in den Gürtel und machte sich auf den Weg. Als es die ersten Hügel des Kursivgebirges erklommen hatte, warf es einen letzten Blick zurück auf die Skyline seiner Heimatstadt Buchstabhausen, die Headline von Alphabetdorf und die Subline seiner eigenen Straße, der Zeilengasse. Wehmütig lief ihm eine rhetorische Frage über die Wange, dann setzte es seinen Weg fort. Unterwegs traf es eine Copy. Die Copy warnte das Blindtextchen, da, wo sie herkäme wäre sie zigmal umgeschrieben worden und alles, was von ihrem Ursprung noch übrig wäre, sei das Wort \"und\" und das Blindtextchen solle umkehren und wieder in sein eigenes, sicheres Land zurückkehren. Doch alles Gutzureden konnte es nicht überzeugen und so dauerte es nicht lange, bis ihm ein paar heimtückische Werbetexter auflauerten, es mit Longe und Parole betrunken machten und es dann in ihre Agentur schleppten, wo sie es für ihre Projekte wieder und wieder mißbrauchten. Und wenn es nicht umgeschrieben wurde, dann benutzen Sie es immernoch. Weit hinten, hinter den Wortbergen, fern der Länder Vokalien und Konsonantien leben die Blindtexte. Abgeschieden wohnen sie in Buchstabhausen an der Küste des Semantik, eines großen Sprachozeans. Ein kleines Bächlein namens Duden fließt durch ihren Ort und versorgt sie mit den nötigen Regelialien. Es ist ein paradiesmatisches Land, in dem einem gebratene Satzteile in den Mund fliegen. Nicht einmal von der allmächtigen Interpunktion werden die Blindtexte beherrscht – ein geradezu unorthographisches Leben. Eines Tages aber beschloß eine kleine Zeile Blindtext, ihr Name war Lorem Ipsum, hinaus zu gehen in die weite Grammatik. Der große Oxmox riet ihr davon ab, da es dort wimmele von bösen Kommata, wilden Fragezeichen und hinterhältigen Semikoli, doch das Blindtextchen ließ sich nicht beirren. Es packte seine sieben Versalien, schob sich sein Initial in den Gürtel und machte sich auf den Weg. Als es die ersten Hügel des Kursivgebirges erklommen hatte, warf es einen letzten Blick zurück auf die Skyline seiner Heimatstadt Buchstabhausen, die Headline von Alphabetdorf und die Subline seiner eigenen Straße, der Zeilengasse. Wehmütig lief ihm eine rhetorische Frage über die Wange, dann setzte es seinen Weg fort. Unterwegs traf es eine Copy. Die Copy warnte das Blindtextchen, da, wo sie herkäme wäre sie zigmal umgeschrieben worden und alles, was von ihrem Ursprung noch übrig wäre, sei das Wort \"und\" und das Blindtextchen solle umkehren und wieder in sein eigenes, sicheres Land zurückkehren. Doch alles Gutzureden konnte es nicht überzeugen und so dauerte es nicht lange, bis ihm ein paar heimtückische Werbetexter auflauerten, es mit Longe und Parole betrunken machten und es dann in ihre Agentur schleppten, wo sie es für ihre Projekte wieder und wieder mißbrauchten. Und wenn es nicht umgeschrieben wurde, dann benutzen Sie es immernoch.Weit hinten, hinter den Wortbergen, fern der Länder Vokalien und Konsonantien leben die Blindtexte. Abgeschieden wohnen sie in Buchstabhausen an der Küste des Semantik, eines großen Sprachozeans. Ein kleines Bächlein namens Duden fließt durch ihren Ort und versorgt sie mit den nötigen Regelialien. Es ist ein paradiesmatisches Land, in dem einem gebratene Satzteile in den Mund fliegen. Nicht einmal von der allmächtigen Interpunktion werden die Blindtexte beherrscht – ein geradezu unorthographisches Leben. Eines Tages aber beschloß eine kleine Zeile Blindtext, ihr Name war Lorem Ipsum, hinaus zu gehen in die weite Grammatik. Der große Oxmox riet ihr davon ab, da es dort wimmele von bösen Kommata, wilden Fragezeichen und hinterhältigen Semikoli, doch das Blindtextchen ließ sich nicht beirren. Es packte seine sieben Versalien, schob sich sein Initial in den Gürtel und machte sich auf den Weg. Als es die ersten Hügel des Kursivgebirges erklommen hatte, warf es einen".split(" ")
38 | corpus6 = "米くを舵4物委らご氏松ハナテフ月関ソ時平ふいの博情れじフ牟万い元56園フメヤオ試図ロツヤ未備王こと傷喫羅踊んゆし。栃ユヱオ書著作ユソツロ英祉業ア大課ご権質フべ空8午キ切軟づン著郎そゃす格町採ヱオマコ処8付国ムハチア究表でなだ際無ロミヱ地兵ぴげ庭体すク発抜爆位や。楽富むゆず盛航カナセ携代ハ本高きた員59今骸ンラえぜ城解イケ穴訴ぽぎ属住ヤケトヌ抱点ト広注厚でて。 国リ出難セユメ軍手ヘカウ画形サヲシ猛85用ヲキミ心死よしと身処ケヨミオ教主ーぽ事業んく字国たさょ図能シミスヤ社8板ル岡世58次戒知院んれり。市メ誘根カ数問禁竹ゃれえみ給辺のでみき今二ぎさ裕止過こクすと無32郎所ラた生展ヌヘス成度慣葬勇厘ばてか。室ゃ下携疲ム色権がぽりっ銃週ノオ姫千テム健蔵い研手ッ放容ル告属め旅側26企サノヨ宅都福ぞ通待ちぴね種脳イど労希望義通むン。 罰しい続負せ著低たル異師ユハワ東添質コチ転集ルヤ雇聴約ヒ前統らた情厳ゆさでや真胸や有披暑棚豆ゆぼたけ。盛ワセロナ情競クるっわ講3音ずをせ少地めしぜょ手63明視れに判企ヒヌエソ求総58特本ね井比ユラキ禁頭馬るゅリす能率率かがさわ。葉サソ医郡ヱヘソ労帰ナケスミ救写ワヘ株審ネヒニミ安逮イ人画ラ涯車はラ極騒りなド件5級ンかふー劇41著ぱぐ凱討だ文世ぶづどま界善魅マ渓経競融れがや。 連ーぜらご模分ッ視外ばフく運発群ほぼづ育越一ほごクけ案募ヲイソ治会イせフ製君ぜた漢村1変リヒ構5際ツ御文ヲ臭入さドぼ代書ハケ引技ろみれ回観注倉徹ぱ。論ラづ海要サ情座ゃり齢宣ラモエ芸化エマホ覧催回ら戦69本外ト葬岳な政画か連針ぴリフず。約ル闘辺ぽ経2応掲ホサアラ塾小コラ画決クノオ上室レヌヱ勝逮ぜるえむ責豊チノ明意ひけ訟6碁草メタチエ財午召喝塊む。 決めでわ名金つけレわ続人県約ぽぼす尾腹ユサ戦載リシ護賀レモフツ重涯ニ治者むんっみ職更カタチレ提話2何ワ責東まけげふ能政ヌ供禁がびてわ提改倶れめ。読み担後ぽ安加ぎ論鹿ツ統最お気麻月つじもあ竜思いろめ判必満理トコ文連ムイウハ寄串ざほびー。文ゆこっ向27年メイ便能ノセヲ待1王スねたゆ伝派んね点過カト治読よにきべ使人スシ都言え阻8割べづえみ注引敷的岳犠眠どそ。 学用イだ医客開ロ供界もぞだ実隆モイヌ務坂ナコヲ権野ろづ初場ぱ低会づぱじ新倒コ化政レ止奮浸猪ッわえづ。形いやリ要帰ほまむだ業領スル必打さ島14巻リ集日ネヘホタ面幅ち写上そぴ円図ムタコモ報使イわざと会催ヤヲ康証をドぶレ盤岡ホハツ作29管しをめ公問懐蓄っさ。来ゆぼあぱ投秋シ語右ぐ身靖かば辛握捕家記ヘワ神岐囲づ毘観メテクツ政73夕罪57需93誌飲査仁さ。 変レめ束球よんま会特ヱコ聞重だ史純ーどる件32浦レぴよゃ上強ネラリロ査従セユヤ専棋光レ作表ひぶ予正ぜーな誉確フス函6報円ス進治ね能営済否雄でわょ。42生型ば着続ア短実ぎおめび前環闘ラヤヲル診均っとにの声公トヱテマ整試椅情久妊舌頃ざとっく。品キチトテ阿国ラら受87世ヲフセリ川86個ーょぼげ危子ヘレカメ無会ぱかへ事通んかて電条ロツ徴商ぶぞそを居暑メ害広せもがり禁応レミヲ応響割壮憶はぱ。 千れンが織財メニ況界ネトレミ学豊フオホシ近月レたやご的罪ょな菱技ちる警栗エセ提89林危氷48参ア説森クキヒヱ薬社ホコエリ負和ルび紀下ケミイ掲歳特ごず扱底ク護木連ちクを各形ばすか。変ぱなれ町7融ヌ街準以タユヘム質裕ぶで遺語俊ぎずょ事金文キ写多山ーゆに歩帯すで会世クぜよ論写ヲ達71林危氷5間続ぎぜび高怠す。 係8青け応著ミ戦条ナヘネカ思79未ぎ算伊をゃ泉人ーづ需説っ畑鹿27軽ラソツ権2促千護ルロナカ開国ケ暴嶋ご池表だ。佐フナ訪麻はてせば勝効をあ医戦画とさわぴ者両すいあ並来んば載食ぴ件友頂業へえぞ魚祝ネラ聞率スコリケ始全ンこび夫出ドふ今布うぎふゅ実克即哉循やしんな。 暮す備54依紀てッん末刊と柔称むてス無府ケイ変壌をぱ汁連フマス海世ヌ中負知問ナヘケ純推ひ読着ヒ言若私軽れ。掲けフむ王本オコ線人をっさ必和断セソヲハ図芸ちかな防長りぶは投新意相ツ並5余セ職岳ぞ端古空援そ。森ヨエチ題5東っ自兄ち暴5近鹿横ト的京ハ安氷ナキ深際ぎ並節くスむの権工ほルせ京49効タムチ処三ぞぴラ済国ずっ文経ヘトミ水分準そが。".split(" ")
39 | corpus7 = "AbortMultipartUpload CompleteMultipartUpload CopyObject CreateBucket CreateMultipartUpload DeleteBucket DeleteBucketAnalyticsConfiguration DeleteBucketCors DeleteBucketEncryption DeleteBucketIntelligentTieringConfiguration DeleteBucketInventoryConfiguration DeleteBucketLifecycle DeleteBucketMetricsConfiguration DeleteBucketOwnershipControls DeleteBucketPolicy DeleteBucketReplication DeleteBucketTagging DeleteBucketWebsite DeleteObject DeleteObjects DeleteObjectTagging DeletePublicAccessBlock GetBucketAccelerateConfiguration GetBucketAcl GetBucketAnalyticsConfiguration GetBucketCors GetBucketEncryption GetBucketIntelligentTieringConfiguration GetBucketInventoryConfiguration GetBucketLifecycle GetBucketLifecycleConfiguration GetBucketLocation GetBucketLogging GetBucketMetricsConfiguration GetBucketNotification GetBucketNotificationConfiguration GetBucketOwnershipControls GetBucketPolicy GetBucketPolicyStatus GetBucketReplication GetBucketRequestPayment GetBucketTagging GetBucketVersioning GetBucketWebsite GetObject GetObjectAcl GetObjectAttributes GetObjectLegalHold GetObjectLockConfiguration GetObjectRetention GetObjectTagging GetObjectTorrent GetPublicAccessBlock HeadBucket HeadObject ListBucketAnalyticsConfigurations ListBucketIntelligentTieringConfigurations ListBucketInventoryConfigurations ListBucketMetricsConfigurations ListBuckets ListMultipartUploads ListObjects ListObjectsV2 ListObjectVersions ListParts PutBucketAccelerateConfiguration PutBucketAcl PutBucketAnalyticsConfiguration PutBucketCors PutBucketEncryption PutBucketIntelligentTieringConfiguration PutBucketInventoryConfiguration PutBucketLifecycle PutBucketLifecycleConfiguration PutBucketLogging PutBucketMetricsConfiguration PutBucketNotification PutBucketNotificationConfiguration PutBucketOwnershipControls PutBucketPolicy PutBucketReplication PutBucketRequestPayment PutBucketTagging PutBucketVersioning PutBucketWebsite PutObject PutObjectAcl PutObjectLegalHold PutObjectLockConfiguration PutObjectRetention PutObjectTagging PutPublicAccessBlock RestoreObject SelectObjectContent UploadPart UploadPartCopy WriteGetObjectResponse CreateAccessPoint CreateAccessPointForObjectLambda CreateBucket CreateJob CreateMultiRegionAccessPoint DeleteAccessPoint DeleteAccessPointForObjectLambda DeleteAccessPointPolicy DeleteAccessPointPolicyForObjectLambda DeleteBucket DeleteBucketLifecycleConfiguration DeleteBucketPolicy DeleteBucketReplication DeleteBucketTagging DeleteJobTagging DeleteMultiRegionAccessPoint DeletePublicAccessBlock DeleteStorageLensConfiguration DeleteStorageLensConfigurationTagging DescribeJob DescribeMultiRegionAccessPointOperation GetAccessPoint GetAccessPointConfigurationForObjectLambda GetAccessPointForObjectLambda GetAccessPointPolicy GetAccessPointPolicyForObjectLambda GetAccessPointPolicyStatus GetAccessPointPolicyStatusForObjectLambda GetBucket GetBucketLifecycleConfiguration GetBucketPolicy GetBucketReplication GetBucketTagging GetBucketVersioning GetJobTagging GetMultiRegionAccessPoint GetMultiRegionAccessPointPolicy GetMultiRegionAccessPointPolicyStatus GetMultiRegionAccessPointRoutes GetPublicAccessBlock GetStorageLensConfiguration GetStorageLensConfigurationTagging ListAccessPoints ListAccessPointsForObjectLambda ListJobs ListMultiRegionAccessPoints ListRegionalBuckets ListStorageLensConfigurations PutAccessPointConfigurationForObjectLambda PutAccessPointPolicy PutAccessPointPolicyForObjectLambda PutBucketLifecycleConfiguration PutBucketPolicy PutBucketReplication PutBucketTagging PutBucketVersioning PutJobTagging PutMultiRegionAccessPointPolicy PutPublicAccessBlock PutStorageLensConfiguration PutStorageLensConfigurationTagging SubmitMultiRegionAccessPointRoutes UpdateJobPriority UpdateJobStatus".split(" ")
40 | corps = [corpus1, corpus2, corpus3, corpus4, corpus5, corpus6, corpus7]
41 | for corpus in corps:
42 | benchmark(corpus)
43 |
44 |
45 | if __name__ == "__main__":
46 | main()
--------------------------------------------------------------------------------
/my_utils/__init__.mojo:
--------------------------------------------------------------------------------
1 | from memory import memcmp
2 | from pathlib import Path
3 |
4 | fn int_cmp(a: UInt32, b: UInt32) -> Int:
5 | return int(a) - int(b)
6 |
7 | fn int_cmp64(a: UInt64, b: UInt64) -> Int:
8 | return int(a) - int(b)
9 |
10 | fn int_to_str(a: UInt32) -> String:
11 | return str(a)
12 |
13 | fn int_to_str64(a: UInt64) -> String:
14 | return str(a)
15 |
16 | fn cmp_strl(a: StringLiteral, b: StringLiteral) -> Int:
17 | var l = min(len(a), len(b))
18 | var p1 = a.unsafe_ptr()
19 | var p2 = b.unsafe_ptr()
20 | var diff = memcmp(p1, p2, l)
21 |
22 | return diff if diff != 0 else len(a) - len(b)
23 |
24 | fn cmp_str(a: String, b: String) -> Int:
25 | var l = min(len(a), len(b))
26 | var p1 = a.unsafe_ptr()
27 | var p2 = b.unsafe_ptr()
28 | var diff = memcmp(p1, p2, l)
29 |
30 | return diff if diff != 0 else len(a) - len(b)
31 |
32 | fn stsl(a: StringLiteral) -> String:
33 | return a
34 |
35 | fn corpus1() raises -> List[String]:
36 | return String('Lorem ipsum dolor sit amet, consectetur adipiscing elit. Quisque orci urna, pretium et porta ac, porttitor sit amet sem. Fusce sagittis lorem neque, vitae sollicitudin elit suscipit et. In interdum convallis nisl in ornare. Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia curae; Aliquam erat volutpat. Morbi mollis iaculis lectus ac tincidunt. Fusce nisi lacus, semper eu dignissim et, malesuada non mi. Sed euismod urna vel elit faucibus, eu bibendum ante fringilla. Curabitur tempus in turpis at mattis. Aliquam erat volutpat. Donec maximus elementum felis, sit amet dignissim augue tincidunt blandit. Aliquam fermentum, est eu mollis.').split(" ")
37 |
38 | fn corpus2() raises -> List[String]:
39 | return String('But I must explain to you how all this mistaken idea of denouncing pleasure and praising pain was born and I will give you a complete account of the system, and expound the actual teachings of the great explorer of the truth, the master-builder of human happiness. No one rejects, dislikes, or avoids pleasure itself, because it is pleasure, but because those who do not know how to pursue pleasure rationally encounter consequences that are extremely painful. Nor again is there anyone who loves or pursues or desires to obtain pain of itself, because it is pain, but because occasionally circumstances occur in which toil and pain can procure him some great pleasure. To take a trivial example, which of us ever undertakes laborious physical exercise, except to obtain some advantage from it? But who has any right to find fault with a man who chooses to enjoy a pleasure that has no annoying consequences, or one who avoids a pain that produces no resultant pleasure? On the other hand, we denounce with righteous indignation and dislike men who are so beguiled and demoralized by the charms of pleasure of the moment, so blinded by desire, that they cannot foresee the pain and trouble that are bound to ensue; and equal blame belongs to those who fail in their duty through weakness of will, which is the same as saying through shrinking from toil and pain. These cases are perfectly simple and easy to distinguish. In a free hour, when our power of choice is untrammelled and when nothing prevents our being able to do what we like best, every pleasure is to be welcomed and every pain avoided. But in certain circumstances and owing to the claims of duty or the obligations of business it will frequently occur that pleasures have to be repudiated and annoyances accepted. The wise man therefore always holds in these matters to this principle of selection: he rejects pleasures to secure other greater pleasures, or else he endures pains to avoid worse pains. But I must explain to you how all this mistaken idea of denouncing pleasure and praising pain was born and I will give you a complete account of the system, and expound the actual teachings of the great explorer of the truth, the master-builder of human happiness. No one rejects, dislikes, or avoids pleasure itself, because it is pleasure, but because those who do not know how to pursue pleasure rationally encounter consequences that are extremely painful. Nor again is there anyone who loves or pursues or desires to obtain pain of itself, because it is pain, but because occasionally circumstances occur in which toil and pain can procure him some great pleasure. To take a trivial example, which of us ever undertakes laborious physical exercise, except to obtain some advantage from it? But who has any right to find fault with a man who chooses to enjoy a pleasure that has no annoying consequences, or one who avoids a pain that produces no resultant pleasure? On the other hand, we denounce with righteous indignation and dislike men who are so beguiled and demoralized by the charms of pleasure of the moment, so blinded by desire, that they cannot foresee the pain and trouble that are bound to ensue; and equal blame belongs to those who fail in their duty through weakness of will, which is the same as saying through shrinking from toil and pain. These cases are perfectly simple and easy to distinguish. In a free hour, when our power of choice is untrammelled and when nothing prevents our being able to do what we like best, every pleasure is to be welcomed and every pain avoided. But in certain circumstances and owing to the claims of duty or the obligations of business it will frequently occur that pleasures have to be repudiated and annoyances accepted. The wise man therefore always holds in these matters to this principle of selection: he rejects pleasures to secure other greater pleasures, or else he endures pains to avoid worse pains.But I must explain to you how all this mistaken idea of denouncing pleasure and praising pain was born and I will give you a complete account of the system, and expound the actual teachings of the great explorer of the truth, the master-builder of human happiness. No one rejects, dislikes, or avoids pleasure itself, because it is pleasure, but because those who do not know how to pursue pleasure rationally encounter consequences that are extremely painful. Nor again is there anyone who loves or pursues or desires to obtain pain of itself, because it is pain, but because occasionally circumstances occur in which toil and pain can procure him some great pleasure. To take a trivial example, which of us ever undertakes laborious physical exercise, except to obtain some advantage from it? But who has any right to find fault with a man who chooses to enjoy a pleasure that has no annoying consequences, or one who avoids a pain that produces no resultant pleasure? On the other hand, we denounce with righteous indignation and dislike men who are so beguiled and demoralized by the charms of pleasure of the moment, so blinded by desire, that they cannot foresee the pain and trouble that are bound to ensue; and equal blame belongs to those who fail in their duty through weakness of will, which is the same as saying through shrinking from toil and pain. These cases are perfectly simple and easy to distinguish. In a free hour, when our power of choice is untrammelled and when nothing prevents our being able to do what we like best, every pleasure is to be welcomed and every pain avoided. But in certain circumstances and owing to the claims of duty or the obligations of business it will frequently occur that pleasures have to be repudiated and annoyances accepted. The wise man therefore always holds in these matters to this principle of selection:').split(" ")
40 |
41 | fn corpus3() raises -> List[String]:
42 | return String('A wonderful serenity has taken possession of my entire soul, like these sweet mornings of spring which I enjoy with my whole heart. I am alone, and feel the charm of existence in this spot, which was created for the bliss of souls like mine. I am so happy, my dear friend, so absorbed in the exquisite sense of mere tranquil existence, that I neglect my talents. I should be incapable of drawing a single stroke at the present moment; and yet I feel that I never was a greater artist than now. When, while the lovely valley teems with vapour around me, and the meridian sun strikes the upper surface of the impenetrable foliage of my trees, and but a few stray gleams steal into the inner sanctuary, I throw myself down among the tall grass by the trickling stream; and, as I lie close to the earth, a thousand unknown plants are noticed by me: when I hear the buzz of the little world among the stalks, and grow familiar with the countless indescribable forms of the insects and flies, then I feel the presence of the Almighty, who formed us in his own image, and the breath of that universal love which bears and sustains us, as it floats around us in an eternity of bliss; and then, my friend, when darkness overspreads my eyes, and heaven and earth seem to dwell in my soul and absorb its power, like the form of a beloved mistress, then I often think with longing, Oh, would I could describe these conceptions, could impress upon paper all that is living so full and warm within me, that it might be the mirror of my soul, as my soul is the mirror of the infinite God! O my friend -- but it is too much for my strength -- I sink under the weight of the splendour of these visions! A wonderful serenity has taken possession of my entire soul, like these sweet mornings of spring which I enjoy with my whole heart. I am alone, and feel the charm of existence in this spot, which was created for the bliss of souls like mine. I am so happy, my dear friend, so absorbed in the exquisite sense of mere tranquil existence, that I neglect my talents. I should be incapable of drawing a single stroke at the present moment; and yet I feel that I never was a greater artist than now. When, while the lovely valley teems with vapour around me, and the meridian sun strikes the upper surface of the impenetrable foliage of my trees, and but a few stray gleams steal into the inner sanctuary, I throw myself down among the tall grass by the trickling stream; and, as I lie close to the earth, a thousand unknown plants are noticed by me: when I hear the buzz of the little world among the stalks, and grow familiar with the countless indescribable forms of the insects and flies, then I feel the presence of the Almighty, who formed us in his own image, and the breath of that universal love which bears and sustains us, as it floats around us in an eternity of bliss; and then, my friend, when darkness overspreads my eyes, and heaven and earth seem to dwell in my soul and absorb its power, like the form of a beloved mistress, then I often think with longing, Oh, would I could describe these conceptions, could impress upon paper all that is living so full and warm within me, that it might be the mirror of my soul, as my soul is the mirror of the infinite God! O my friend -- but it is too much for my strength -- I sink under the weight of the splendour of these visions! A wonderful serenity has taken possession of my entire soul, like these sweet mornings of spring which I enjoy with my whole heart. I am alone, and feel the charm of existence in this spot, which was created for the bliss of souls like mine. I am so happy, my dear friend, so absorbed in the exquisite sense of mere tranquil existence, that I neglect my talents. I should be incapable of drawing a single stroke at the present moment; and yet I feel that I never was a greater artist than now. When, while the lovely valley teems with vapour around me, and the meridian sun strikes the upper surface of the impenetrable foliage of my trees, and but a few stray gleams steal into the inner sanctuary, I throw myself down among the tall grass by the trickling stream; and, as I lie close to the earth, a thousand unknown plants are noticed by me: when I hear the buzz of the little world among the stalks, and grow familiar with the countless indescribable forms of the insects and flies, then I feel the presence of the Almighty, who formed us in his own image, and the breath of that universal love which bears and sustains us, as it floats around us in an eternity of bliss; and then, my friend, when darkness overspreads my eyes, and heaven and earth seem to dwell in my soul and absorb its power, like the form of a beloved mistress, then I often think with longing, Oh, would I could describe these conceptions, could impress upon paper all that is living so full and warm within me, that it might be the mirror of my soul, as my soul is the mirror of the infinite God! O my friend -- but it is too much for my strength -- I sink under the weight of the splendour of these visions!A wonderful serenity has taken possession of my entire soul, like these sweet mornings of spring which I enjoy with my whole heart. I am alone, and feel the charm of existence in this spot, which was created for the bliss of souls').split(" ")
43 |
44 | fn corpus4() raises -> List[String]:
45 | return String('Проснувшись однажды утром после беспокойного сна, Грегор Замза обнаружил, что он у себя в постели превратился в страшное насекомое. Лежа на панцирнотвердой спине, он видел, стоило ему приподнять голову, свой коричневый, выпуклый, разделенный дугообразными чешуйками живот, на верхушке которого еле держалось готовое вот-вот окончательно сползти одеяло. Его многочисленные, убого тонкие по сравнению с остальным телом ножки беспомощно копошились у него перед глазами. «Что со мной случилось?» – подумал он. Это не было сном. Его комната, настоящая, разве что слишком маленькая, но обычная комната, мирно покоилась в своих четырех хорошо знакомых стенах. Над столом, где были разложены распакованные образцы сукон – Замза был коммивояжером, – висел портрет, который он недавно вырезал из иллюстрированного журнала и вставил в красивую золоченую рамку. На портрете была изображена дама в меховой шляпе и боа, она сидела очень прямо и протягивала зрителю тяжелую меховую муфту, в которой целиком исчезала ее рука. Затем взгляд Грегора устремился в окно, и пасмурная погода – слышно было, как по жести подоконника стучат капли дождя – привела его и вовсе в грустное настроение. «Хорошо бы еще немного поспать и забыть всю эту чепуху», – подумал он, но это было совершенно неосуществимо, он привык спать на правом боку, а в теперешнем своем состоянии он никак не мог принять этого положения. С какой бы силой ни поворачивался он на правый бок, он неизменно сваливался опять на спину. Закрыв глаза, чтобы не видеть своих барахтающихся ног, он проделал это добрую сотню раз и отказался от этих попыток только тогда, когда почувствовал какую-то неведомую дотоле, тупую и слабую боль в боку. «Ах ты, господи, – подумал он, – какую я выбрал хлопотную профессию! Изо дня в день в разъездах. Деловых волнений куда больше, чем на месте, в торговом доме, а кроме того, изволь терпеть тяготы дороги, думай о расписании поездов, мирись с плохим, нерегулярным питанием, завязывай со все новыми и новыми людьми недолгие, никогда не бывающие сердечными отношения. Черт бы побрал все это!» Он почувствовал вверху живота легкий зуд; медленно подвинулся на спине к прутьям кровати, чтобы удобнее было поднять голову; нашел зудевшее место, сплошь покрытое, как оказалось, белыми непонятными точечками; хотел было ощупать это место одной из ножек, но сразу отдернул ее, ибо даже простое прикосновение вызвало у него, Грегора, озноб. Он соскользнул в прежнее свое положение. «От этого раннего вставания, – подумал он, – можно совсем обезуметь. Человек должен высыпаться. Другие коммивояжеры живут, как одалиски. Когда я, например, среди дня возвращаюсь в гостиницу, чтобы переписать полученные заказы, эти господа только завтракают. А осмелься я вести себя так, мои хозяин выгнал бы меня сразу. Кто знает, впрочем, может быть, это было бы даже очень хорошо для меня. Если бы я не сдерживался ради родителей, я бы давно заявил об уходе, я бы подошел к своему хозяину и выложил ему все, что о нем думаю. Он бы так и свалился с конторки! Странная у него манера – садиться на конторку и с ее высоты разговаривать со служащим, который вдобавок вынужден подойти вплотную к конторке из-за того, что хозяин туг на ухо. Однако надежда еще не совсем потеряна: как только я накоплю денег, чтобы выплатить долг моих родителей – на это уйдет еще лет пять-шесть, – я так и поступлю. Тут-то мы и распрощаемся раз и навсегда. А пока что надо подниматься, мой поезд отходит в пять». И он взглянул на будильник, который тикал на сундуке. «Боже правый!» – подумал он. Было половина седьмого, и стрелки спокойно двигались дальше, было даже больше половины, без малого уже три четверти. Неужели будильник не звонил? С кровати было видно, что он поставлен правильно, на четыре часа; и он, несомненно, звонил. Но как можно было спокойно спать под этот сотрясающий мебель трезвон? Ну, спал-то он неспокойно, но, видимо, крепко. Однако что делать теперь? Следующий поезд уходит в семь часов; чтобы поспеть на него, он должен отчаянно торопиться, а набор образцов еще не упакован, да и сам он отнюдь не чувствует себя свежим и легким на подъем. И даже поспей он на поезд, хозяйского разноса ему все равно не избежать – ведь рассыльный торгового дома дежурил у пятичасового поезда и давно доложил о его, Грегора, опоздании. Рассыльный, человек бесхарактерный и неумный, был ставленником хозяина. А что, если сказаться больным? Но это было бы крайне неприятно и показалось бы подозрительным, ибо за пятилетнюю свою службу Грегор ни разу еще не болел. Хозяин, конечно, привел бы врача больничной кассы и стал попрекать родителей сыном-лентяем, отводя любые возражения ссылкой на этого врача, по мнению которого все люди на свете совершенно здоровы и только не любят работать. И разве в данном случае он был бы так уж неправ? Если не считать сонливости, действительно странной после такого долгого сна, Грегор и в самом деле чувствовал себя превосходно и был даже чертовски голоден.Проснувшись однажды утром после беспокойного сна, Грегор Замза обнаружил, что он у себя в постели превратился в страшное насекомое. Лежа на панцирнотвердой спине, он видел, стоило ему приподнять голову, свой коричневый, выпуклый, разделенный дугообразными чешуйками живот, на верхушке которого еле держалось готовое вот-вот окончательно сползти одеяло. Его многочисленные, убого тонкие по сравнению с остальным телом ножки беспомощно копошились у него перед глазами. «Что со мной случилось?» – подумал он. Это не было сном. Его комната, настоящая, разве что слишком маленькая, но обычная комната, мирно покоилась в своих четырех хорошо знакомых стенах. Над столом, где были разложены распакованные образцы сукон – Замза был коммивояжером, – висел портрет, который он недавно вырезал из иллюстрированного журнала и вставил в красивую золоченую рамку. На портрете была изображена дама в меховой шляпе и боа, она сидела очень прямо и протягивала зрителю тяжелую меховую муфту, в которой целиком исчезала ее рука. Затем взгляд Грегора устремился в окно, и пасмурная погода – слышно было, как по жести подоконника стучат капли дождя – привела его и вовсе в грустное настроение. «Хорошо бы еще немного поспать и забыть всю эту чепуху», – подумал он, но это было совершенно неосуществимо, он привык спать на правом боку, а в теперешнем своем состоянии он никак не мог принять этого положения. С какой бы силой ни поворачивался он на правый бок, он неизменно сваливался опять на спину.').split(" ")
46 |
47 | fn corpus5() raises -> List[String]:
48 | return String('Weit hinten, hinter den Wortbergen, fern der Länder Vokalien und Konsonantien leben die Blindtexte. Abgeschieden wohnen sie in Buchstabhausen an der Küste des Semantik, eines großen Sprachozeans. Ein kleines Bächlein namens Duden fließt durch ihren Ort und versorgt sie mit den nötigen Regelialien. Es ist ein paradiesmatisches Land, in dem einem gebratene Satzteile in den Mund fliegen. Nicht einmal von der allmächtigen Interpunktion werden die Blindtexte beherrscht – ein geradezu unorthographisches Leben. Eines Tages aber beschloß eine kleine Zeile Blindtext, ihr Name war Lorem Ipsum, hinaus zu gehen in die weite Grammatik. Der große Oxmox riet ihr davon ab, da es dort wimmele von bösen Kommata, wilden Fragezeichen und hinterhältigen Semikoli, doch das Blindtextchen ließ sich nicht beirren. Es packte seine sieben Versalien, schob sich sein Initial in den Gürtel und machte sich auf den Weg. Als es die ersten Hügel des Kursivgebirges erklommen hatte, warf es einen letzten Blick zurück auf die Skyline seiner Heimatstadt Buchstabhausen, die Headline von Alphabetdorf und die Subline seiner eigenen Straße, der Zeilengasse. Wehmütig lief ihm eine rhetorische Frage über die Wange, dann setzte es seinen Weg fort. Unterwegs traf es eine Copy. Die Copy warnte das Blindtextchen, da, wo sie herkäme wäre sie zigmal umgeschrieben worden und alles, was von ihrem Ursprung noch übrig wäre, sei das Wort "und" und das Blindtextchen solle umkehren und wieder in sein eigenes, sicheres Land zurückkehren. Doch alles Gutzureden konnte es nicht überzeugen und so dauerte es nicht lange, bis ihm ein paar heimtückische Werbetexter auflauerten, es mit Longe und Parole betrunken machten und es dann in ihre Agentur schleppten, wo sie es für ihre Projekte wieder und wieder mißbrauchten. Und wenn es nicht umgeschrieben wurde, dann benutzen Sie es immernoch. Weit hinten, hinter den Wortbergen, fern der Länder Vokalien und Konsonantien leben die Blindtexte. Abgeschieden wohnen sie in Buchstabhausen an der Küste des Semantik, eines großen Sprachozeans. Ein kleines Bächlein namens Duden fließt durch ihren Ort und versorgt sie mit den nötigen Regelialien. Es ist ein paradiesmatisches Land, in dem einem gebratene Satzteile in den Mund fliegen. Nicht einmal von der allmächtigen Interpunktion werden die Blindtexte beherrscht – ein geradezu unorthographisches Leben. Eines Tages aber beschloß eine kleine Zeile Blindtext, ihr Name war Lorem Ipsum, hinaus zu gehen in die weite Grammatik. Der große Oxmox riet ihr davon ab, da es dort wimmele von bösen Kommata, wilden Fragezeichen und hinterhältigen Semikoli, doch das Blindtextchen ließ sich nicht beirren. Es packte seine sieben Versalien, schob sich sein Initial in den Gürtel und machte sich auf den Weg. Als es die ersten Hügel des Kursivgebirges erklommen hatte, warf es einen letzten Blick zurück auf die Skyline seiner Heimatstadt Buchstabhausen, die Headline von Alphabetdorf und die Subline seiner eigenen Straße, der Zeilengasse. Wehmütig lief ihm eine rhetorische Frage über die Wange, dann setzte es seinen Weg fort. Unterwegs traf es eine Copy. Die Copy warnte das Blindtextchen, da, wo sie herkäme wäre sie zigmal umgeschrieben worden und alles, was von ihrem Ursprung noch übrig wäre, sei das Wort "und" und das Blindtextchen solle umkehren und wieder in sein eigenes, sicheres Land zurückkehren. Doch alles Gutzureden konnte es nicht überzeugen und so dauerte es nicht lange, bis ihm ein paar heimtückische Werbetexter auflauerten, es mit Longe und Parole betrunken machten und es dann in ihre Agentur schleppten, wo sie es für ihre Projekte wieder und wieder mißbrauchten. Und wenn es nicht umgeschrieben wurde, dann benutzen Sie es immernoch. Weit hinten, hinter den Wortbergen, fern der Länder Vokalien und Konsonantien leben die Blindtexte. Abgeschieden wohnen sie in Buchstabhausen an der Küste des Semantik, eines großen Sprachozeans. Ein kleines Bächlein namens Duden fließt durch ihren Ort und versorgt sie mit den nötigen Regelialien. Es ist ein paradiesmatisches Land, in dem einem gebratene Satzteile in den Mund fliegen. Nicht einmal von der allmächtigen Interpunktion werden die Blindtexte beherrscht – ein geradezu unorthographisches Leben. Eines Tages aber beschloß eine kleine Zeile Blindtext, ihr Name war Lorem Ipsum, hinaus zu gehen in die weite Grammatik. Der große Oxmox riet ihr davon ab, da es dort wimmele von bösen Kommata, wilden Fragezeichen und hinterhältigen Semikoli, doch das Blindtextchen ließ sich nicht beirren. Es packte seine sieben Versalien, schob sich sein Initial in den Gürtel und machte sich auf den Weg. Als es die ersten Hügel des Kursivgebirges erklommen hatte, warf es einen letzten Blick zurück auf die Skyline seiner Heimatstadt Buchstabhausen, die Headline von Alphabetdorf und die Subline seiner eigenen Straße, der Zeilengasse. Wehmütig lief ihm eine rhetorische Frage über die Wange, dann setzte es seinen Weg fort. Unterwegs traf es eine Copy. Die Copy warnte das Blindtextchen, da, wo sie herkäme wäre sie zigmal umgeschrieben worden und alles, was von ihrem Ursprung noch übrig wäre, sei das Wort "und" und das Blindtextchen solle umkehren und wieder in sein eigenes, sicheres Land zurückkehren. Doch alles Gutzureden konnte es nicht überzeugen und so dauerte es nicht lange, bis ihm ein paar heimtückische Werbetexter auflauerten, es mit Longe und Parole betrunken machten und es dann in ihre Agentur schleppten, wo sie es für ihre Projekte wieder und wieder mißbrauchten. Und wenn es nicht umgeschrieben wurde, dann benutzen Sie es immernoch.Weit hinten, hinter den Wortbergen, fern der Länder Vokalien und Konsonantien leben die Blindtexte. Abgeschieden wohnen sie in Buchstabhausen an der Küste des Semantik, eines großen Sprachozeans. Ein kleines Bächlein namens Duden fließt durch ihren Ort und versorgt sie mit den nötigen Regelialien. Es ist ein paradiesmatisches Land, in dem einem gebratene Satzteile in den Mund fliegen. Nicht einmal von der allmächtigen Interpunktion werden die Blindtexte beherrscht – ein geradezu unorthographisches Leben. Eines Tages aber beschloß eine kleine Zeile Blindtext, ihr Name war Lorem Ipsum, hinaus zu gehen in die weite Grammatik. Der große Oxmox riet ihr davon ab, da es dort wimmele von bösen Kommata, wilden Fragezeichen und hinterhältigen Semikoli, doch das Blindtextchen ließ sich nicht beirren. Es packte seine sieben Versalien, schob sich sein Initial in den Gürtel und machte sich auf den Weg. Als es die ersten Hügel des Kursivgebirges erklommen hatte, warf es einen').split(" ")
49 |
50 | fn corpus6() raises -> List[String]:
51 | return String('米くを舵4物委らご氏松ハナテフ月関ソ時平ふいの博情れじフ牟万い元56園フメヤオ試図ロツヤ未備王こと傷喫羅踊んゆし。栃ユヱオ書著作ユソツロ英祉業ア大課ご権質フべ空8午キ切軟づン著郎そゃす格町採ヱオマコ処8付国ムハチア究表でなだ際無ロミヱ地兵ぴげ庭体すク発抜爆位や。楽富むゆず盛航カナセ携代ハ本高きた員59今骸ンラえぜ城解イケ穴訴ぽぎ属住ヤケトヌ抱点ト広注厚でて。 国リ出難セユメ軍手ヘカウ画形サヲシ猛85用ヲキミ心死よしと身処ケヨミオ教主ーぽ事業んく字国たさょ図能シミスヤ社8板ル岡世58次戒知院んれり。市メ誘根カ数問禁竹ゃれえみ給辺のでみき今二ぎさ裕止過こクすと無32郎所ラた生展ヌヘス成度慣葬勇厘ばてか。室ゃ下携疲ム色権がぽりっ銃週ノオ姫千テム健蔵い研手ッ放容ル告属め旅側26企サノヨ宅都福ぞ通待ちぴね種脳イど労希望義通むン。 罰しい続負せ著低たル異師ユハワ東添質コチ転集ルヤ雇聴約ヒ前統らた情厳ゆさでや真胸や有披暑棚豆ゆぼたけ。盛ワセロナ情競クるっわ講3音ずをせ少地めしぜょ手63明視れに判企ヒヌエソ求総58特本ね井比ユラキ禁頭馬るゅリす能率率かがさわ。葉サソ医郡ヱヘソ労帰ナケスミ救写ワヘ株審ネヒニミ安逮イ人画ラ涯車はラ極騒りなド件5級ンかふー劇41著ぱぐ凱討だ文世ぶづどま界善魅マ渓経競融れがや。 連ーぜらご模分ッ視外ばフく運発群ほぼづ育越一ほごクけ案募ヲイソ治会イせフ製君ぜた漢村1変リヒ構5際ツ御文ヲ臭入さドぼ代書ハケ引技ろみれ回観注倉徹ぱ。論ラづ海要サ情座ゃり齢宣ラモエ芸化エマホ覧催回ら戦69本外ト葬岳な政画か連針ぴリフず。約ル闘辺ぽ経2応掲ホサアラ塾小コラ画決クノオ上室レヌヱ勝逮ぜるえむ責豊チノ明意ひけ訟6碁草メタチエ財午召喝塊む。 決めでわ名金つけレわ続人県約ぽぼす尾腹ユサ戦載リシ護賀レモフツ重涯ニ治者むんっみ職更カタチレ提話2何ワ責東まけげふ能政ヌ供禁がびてわ提改倶れめ。読み担後ぽ安加ぎ論鹿ツ統最お気麻月つじもあ竜思いろめ判必満理トコ文連ムイウハ寄串ざほびー。文ゆこっ向27年メイ便能ノセヲ待1王スねたゆ伝派んね点過カト治読よにきべ使人スシ都言え阻8割べづえみ注引敷的岳犠眠どそ。 学用イだ医客開ロ供界もぞだ実隆モイヌ務坂ナコヲ権野ろづ初場ぱ低会づぱじ新倒コ化政レ止奮浸猪ッわえづ。形いやリ要帰ほまむだ業領スル必打さ島14巻リ集日ネヘホタ面幅ち写上そぴ円図ムタコモ報使イわざと会催ヤヲ康証をドぶレ盤岡ホハツ作29管しをめ公問懐蓄っさ。来ゆぼあぱ投秋シ語右ぐ身靖かば辛握捕家記ヘワ神岐囲づ毘観メテクツ政73夕罪57需93誌飲査仁さ。 変レめ束球よんま会特ヱコ聞重だ史純ーどる件32浦レぴよゃ上強ネラリロ査従セユヤ専棋光レ作表ひぶ予正ぜーな誉確フス函6報円ス進治ね能営済否雄でわょ。42生型ば着続ア短実ぎおめび前環闘ラヤヲル診均っとにの声公トヱテマ整試椅情久妊舌頃ざとっく。品キチトテ阿国ラら受87世ヲフセリ川86個ーょぼげ危子ヘレカメ無会ぱかへ事通んかて電条ロツ徴商ぶぞそを居暑メ害広せもがり禁応レミヲ応響割壮憶はぱ。 千れンが織財メニ況界ネトレミ学豊フオホシ近月レたやご的罪ょな菱技ちる警栗エセ提89林危氷48参ア説森クキヒヱ薬社ホコエリ負和ルび紀下ケミイ掲歳特ごず扱底ク護木連ちクを各形ばすか。変ぱなれ町7融ヌ街準以タユヘム質裕ぶで遺語俊ぎずょ事金文キ写多山ーゆに歩帯すで会世クぜよ論写ヲ達71林危氷5間続ぎぜび高怠す。 係8青け応著ミ戦条ナヘネカ思79未ぎ算伊をゃ泉人ーづ需説っ畑鹿27軽ラソツ権2促千護ルロナカ開国ケ暴嶋ご池表だ。佐フナ訪麻はてせば勝効をあ医戦画とさわぴ者両すいあ並来んば載食ぴ件友頂業へえぞ魚祝ネラ聞率スコリケ始全ンこび夫出ドふ今布うぎふゅ実克即哉循やしんな。 暮す備54依紀てッん末刊と柔称むてス無府ケイ変壌をぱ汁連フマス海世ヌ中負知問ナヘケ純推ひ読着ヒ言若私軽れ。掲けフむ王本オコ線人をっさ必和断セソヲハ図芸ちかな防長りぶは投新意相ツ並5余セ職岳ぞ端古空援そ。森ヨエチ題5東っ自兄ち暴5近鹿横ト的京ハ安氷ナキ深際ぎ並節くスむの権工ほルせ京49効タムチ処三ぞぴラ済国ずっ文経ヘトミ水分準そが。').split(" ")
52 |
53 | fn corpus7() raises -> List[String]:
54 | return String('AbortMultipartUpload CompleteMultipartUpload CopyObject CreateBucket CreateMultipartUpload DeleteBucket DeleteBucketAnalyticsConfiguration DeleteBucketCors DeleteBucketEncryption DeleteBucketIntelligentTieringConfiguration DeleteBucketInventoryConfiguration DeleteBucketLifecycle DeleteBucketMetricsConfiguration DeleteBucketOwnershipControls DeleteBucketPolicy DeleteBucketReplication DeleteBucketTagging DeleteBucketWebsite DeleteObject DeleteObjects DeleteObjectTagging DeletePublicAccessBlock GetBucketAccelerateConfiguration GetBucketAcl GetBucketAnalyticsConfiguration GetBucketCors GetBucketEncryption GetBucketIntelligentTieringConfiguration GetBucketInventoryConfiguration GetBucketLifecycle GetBucketLifecycleConfiguration GetBucketLocation GetBucketLogging GetBucketMetricsConfiguration GetBucketNotification GetBucketNotificationConfiguration GetBucketOwnershipControls GetBucketPolicy GetBucketPolicyStatus GetBucketReplication GetBucketRequestPayment GetBucketTagging GetBucketVersioning GetBucketWebsite GetObject GetObjectAcl GetObjectAttributes GetObjectLegalHold GetObjectLockConfiguration GetObjectRetention GetObjectTagging GetObjectTorrent GetPublicAccessBlock HeadBucket HeadObject ListBucketAnalyticsConfigurations ListBucketIntelligentTieringConfigurations ListBucketInventoryConfigurations ListBucketMetricsConfigurations ListBuckets ListMultipartUploads ListObjects ListObjectsV2 ListObjectVersions ListParts PutBucketAccelerateConfiguration PutBucketAcl PutBucketAnalyticsConfiguration PutBucketCors PutBucketEncryption PutBucketIntelligentTieringConfiguration PutBucketInventoryConfiguration PutBucketLifecycle PutBucketLifecycleConfiguration PutBucketLogging PutBucketMetricsConfiguration PutBucketNotification PutBucketNotificationConfiguration PutBucketOwnershipControls PutBucketPolicy PutBucketReplication PutBucketRequestPayment PutBucketTagging PutBucketVersioning PutBucketWebsite PutObject PutObjectAcl PutObjectLegalHold PutObjectLockConfiguration PutObjectRetention PutObjectTagging PutPublicAccessBlock RestoreObject SelectObjectContent UploadPart UploadPartCopy WriteGetObjectResponse", "CreateAccessPoint CreateAccessPointForObjectLambda CreateBucket CreateJob CreateMultiRegionAccessPoint DeleteAccessPoint DeleteAccessPointForObjectLambda DeleteAccessPointPolicy DeleteAccessPointPolicyForObjectLambda DeleteBucket DeleteBucketLifecycleConfiguration DeleteBucketPolicy DeleteBucketReplication DeleteBucketTagging DeleteJobTagging DeleteMultiRegionAccessPoint DeletePublicAccessBlock DeleteStorageLensConfiguration DeleteStorageLensConfigurationTagging DescribeJob DescribeMultiRegionAccessPointOperation GetAccessPoint GetAccessPointConfigurationForObjectLambda GetAccessPointForObjectLambda GetAccessPointPolicy GetAccessPointPolicyForObjectLambda GetAccessPointPolicyStatus GetAccessPointPolicyStatusForObjectLambda GetBucket GetBucketLifecycleConfiguration GetBucketPolicy GetBucketReplication GetBucketTagging GetBucketVersioning GetJobTagging GetMultiRegionAccessPoint GetMultiRegionAccessPointPolicy GetMultiRegionAccessPointPolicyStatus GetMultiRegionAccessPointRoutes GetPublicAccessBlock GetStorageLensConfiguration GetStorageLensConfigurationTagging ListAccessPoints ListAccessPointsForObjectLambda ListJobs ListMultiRegionAccessPoints ListRegionalBuckets ListStorageLensConfigurations PutAccessPointConfigurationForObjectLambda PutAccessPointPolicy PutAccessPointPolicyForObjectLambda PutBucketLifecycleConfiguration PutBucketPolicy PutBucketReplication PutBucketTagging PutBucketVersioning PutJobTagging PutMultiRegionAccessPointPolicy PutPublicAccessBlock PutStorageLensConfiguration PutStorageLensConfigurationTagging SubmitMultiRegionAccessPointRoutes UpdateJobPriority UpdateJobStatus').split(" ")
55 |
56 | fn corpus8() raises -> List[String]:
57 | var text = Path("/usr/share/dict/words").read_text()
58 | # print("Text:", len(text))
59 | return text.splitlines()
--------------------------------------------------------------------------------