├── sha ├── sha256_2.mojo ├── __init__.mojo └── sha256.mojo ├── md5 ├── __init__.mojo └── md5.mojo ├── o1hash ├── __init__.mojo └── o1hash.mojo ├── wyhasher ├── __init__.mojo └── wyhasher.mojo ├── ahasher ├── __init__.mojo └── ahasher.mojo ├── fnv1a ├── __init__.mojo └── fnv1a.mojo ├── fxhash ├── __init__.mojo └── fxhash.mojo ├── benchmark_other_languages ├── js │ └── hash_functions │ │ ├── .gitignore │ │ ├── README.md │ │ ├── package.json │ │ └── package-lock.json ├── go │ └── hash_functions │ │ └── go.mod ├── c │ └── hash_functions │ │ ├── benchmark │ │ ├── Makefile │ │ ├── xxhash.c │ │ ├── xxh3.h │ │ ├── benchmark_wyhash.cpp │ │ └── wyhash.h ├── swift │ └── hash_functions │ │ ├── .gitignore │ │ ├── README.md │ │ ├── .swiftpm │ │ └── xcode │ │ │ ├── package.xcworkspace │ │ │ └── xcshareddata │ │ │ │ └── IDEWorkspaceChecks.plist │ │ │ └── xcshareddata │ │ │ └── xcschemes │ │ │ └── hash_functions.xcscheme │ │ └── Package.swift ├── rust │ └── hash_functions │ │ ├── Cargo.toml │ │ └── Cargo.lock └── python │ └── hash_functions │ ├── md5_benchmark.py │ ├── benchmark_dict.py │ └── benchmark.py ├── images ├── hash_map.png └── hash_functions.png ├── HashFunctionsAndWhereToFindThem.pdf ├── LICENSE ├── checkout_remote_modules.sh ├── test_sha256.mojo ├── test_md5.mojo ├── benchmark_hash_words_file.mojo ├── .gitignore ├── benchmark_hash_functions.mojo ├── test_sha256_2.mojo ├── README.md └── my_utils └── __init__.mojo /sha/sha256_2.mojo: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /md5/__init__.mojo: -------------------------------------------------------------------------------- 1 | from .md5 import md5_string -------------------------------------------------------------------------------- /o1hash/__init__.mojo: -------------------------------------------------------------------------------- 1 | from .o1hash import o1_hash -------------------------------------------------------------------------------- /sha/__init__.mojo: -------------------------------------------------------------------------------- 1 | from .sha256 import sha256_encode -------------------------------------------------------------------------------- /wyhasher/__init__.mojo: -------------------------------------------------------------------------------- 1 | from .wyhasher import wyhash -------------------------------------------------------------------------------- /ahasher/__init__.mojo: -------------------------------------------------------------------------------- 1 | from .ahasher import ahash, AHasher -------------------------------------------------------------------------------- /fnv1a/__init__.mojo: -------------------------------------------------------------------------------- 1 | from .fnv1a import fnv1a32, fnv1a64 -------------------------------------------------------------------------------- /fxhash/__init__.mojo: -------------------------------------------------------------------------------- 1 | from .fxhash import fxhash32, fxhash64 2 | -------------------------------------------------------------------------------- /benchmark_other_languages/js/hash_functions/.gitignore: -------------------------------------------------------------------------------- 1 | /node_modules -------------------------------------------------------------------------------- /images/hash_map.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mzaks/mojo-hash/HEAD/images/hash_map.png -------------------------------------------------------------------------------- /benchmark_other_languages/go/hash_functions/go.mod: -------------------------------------------------------------------------------- 1 | module hash_functions 2 | 3 | go 1.21.5 4 | -------------------------------------------------------------------------------- /images/hash_functions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mzaks/mojo-hash/HEAD/images/hash_functions.png -------------------------------------------------------------------------------- /HashFunctionsAndWhereToFindThem.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mzaks/mojo-hash/HEAD/HashFunctionsAndWhereToFindThem.pdf -------------------------------------------------------------------------------- /benchmark_other_languages/js/hash_functions/README.md: -------------------------------------------------------------------------------- 1 | First execute `npm install` to fetch the dependencies and then execute `node benchmark.js`. 2 | -------------------------------------------------------------------------------- /benchmark_other_languages/c/hash_functions/benchmark: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mzaks/mojo-hash/HEAD/benchmark_other_languages/c/hash_functions/benchmark -------------------------------------------------------------------------------- /benchmark_other_languages/js/hash_functions/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "dependencies": { 3 | "wyhash": "1.0.0", 4 | "xxhashjs": "0.2.2" 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /benchmark_other_languages/swift/hash_functions/.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | /.build 3 | /Packages 4 | /*.xcodeproj 5 | xcuserdata/ 6 | DerivedData/ 7 | .swiftpm/config/registries.json 8 | .swiftpm/xcode/package.xcworkspace/contents.xcworkspacedata 9 | .netrc 10 | -------------------------------------------------------------------------------- /benchmark_other_languages/swift/hash_functions/README.md: -------------------------------------------------------------------------------- 1 | # hash_functions 2 | 3 | Benachmark for default hash function. 4 | 5 | Installing Swift on Ubunutu https://gist.github.com/Jswizzy/408af5829970f9eb18f9b45f891910bb (pick the latest version, tried with 5.9.2) 6 | 7 | Run the benchamrk with `swift run --configuration release` 8 | -------------------------------------------------------------------------------- /benchmark_other_languages/swift/hash_functions/.swiftpm/xcode/package.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | IDEDidComputeMac32BitWarning 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /benchmark_other_languages/rust/hash_functions/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "hash_functions" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 7 | 8 | [dependencies] 9 | rand = "0.8.5" 10 | fxhash = "0.2.1" 11 | ahash = "0.8.6" 12 | wyhash2 = "0.2.1" 13 | md5 = "0.7.0" 14 | 15 | [profile.dev] 16 | opt-level = 3 -------------------------------------------------------------------------------- /benchmark_other_languages/python/hash_functions/md5_benchmark.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | import time 3 | 4 | if __name__ == "__main__": 5 | file = open("/usr/share/dict/words", "r") 6 | content = file.read().encode() 7 | tik = time.time_ns() 8 | result = hashlib.md5(content) 9 | tok = time.time_ns() 10 | print(result.hexdigest()) 11 | print(f"In: {tok - tik}") 12 | 13 | tik = time.time_ns() 14 | result = hashlib.sha256(content) 15 | tok = time.time_ns() 16 | print(result.hexdigest()) 17 | print(f"In: {tok - tik}") 18 | 19 | -------------------------------------------------------------------------------- /benchmark_other_languages/swift/hash_functions/Package.swift: -------------------------------------------------------------------------------- 1 | // swift-tools-version: 5.8 2 | // The swift-tools-version declares the minimum version of Swift required to build this package. 3 | 4 | import PackageDescription 5 | 6 | let package = Package( 7 | name: "hash_functions", 8 | products: [ 9 | .executable( 10 | name: "hash_functions", 11 | targets: ["hash_functions"]), 12 | ], 13 | dependencies: [], 14 | targets: [ 15 | .executableTarget( 16 | name: "hash_functions", 17 | dependencies: []) 18 | ] 19 | ) 20 | -------------------------------------------------------------------------------- /o1hash/o1hash.mojo: -------------------------------------------------------------------------------- 1 | fn o1_hash(s: String) -> UInt64: 2 | var p = s.unsafe_ptr() 3 | var bytes = s.byte_length() 4 | if bytes >= 4: 5 | var first = p.bitcast[DType.uint32]()[0] 6 | var middle = p.offset((bytes >> 1) - 2).bitcast[DType.uint32]()[0] 7 | var last = p.offset(bytes - 4).bitcast[DType.uint32]()[0] 8 | return ((first + last) * middle).cast[DType.uint64]() 9 | if bytes: 10 | var tail = (p[0].cast[DType.uint64]() << 16) 11 | | (p[bytes >> 1].cast[DType.uint64]() << 8) 12 | | p[bytes - 1].cast[DType.uint64]() 13 | return tail * 0xa0761d6478bd642 14 | return 0 15 | -------------------------------------------------------------------------------- /fnv1a/fnv1a.mojo: -------------------------------------------------------------------------------- 1 | alias fnv_32_prime: UInt32 = 0x01000193 2 | alias fnv_32_offset_bassis: UInt32 = 0x811c9dc5 3 | alias fnv_64_prime = 0x100000001b3 4 | alias fnv_64_offset_bassis = 0xcbf29ce484222325 5 | 6 | 7 | @always_inline 8 | fn fnv1a32(s: String) -> UInt32: 9 | var hash = fnv_32_offset_bassis 10 | var buffer = UnsafePointer(s.unsafe_ptr()) 11 | for i in range(len(s)): 12 | hash ^= buffer.load(i).cast[DType.uint32]() 13 | hash *= fnv_32_prime 14 | return hash 15 | 16 | @always_inline 17 | fn fnv1a64(s: String) -> UInt64: 18 | var hash: UInt64 = fnv_64_offset_bassis 19 | var buffer = UnsafePointer(s.unsafe_ptr()) 20 | for i in range(len(s)): 21 | hash ^= buffer.load(i).cast[DType.uint64]() 22 | hash *= fnv_64_prime 23 | return hash -------------------------------------------------------------------------------- /benchmark_other_languages/c/hash_functions/Makefile: -------------------------------------------------------------------------------- 1 | # wyhash bench makefile 2 | 3 | CXX = g++ 4 | CXXFLAGS = -std=c++11 -O2 -s -Wall -march=native 5 | 6 | TARGETS = wyhash0 wyhash1 wyhash2 xxh3scalar xxh3sse2 xxh3avx2 7 | 8 | all: $(TARGETS) 9 | 10 | wyhash0: benchmark.cpp wyhash.h 11 | $(CXX) benchmark.cpp -o wyhash0 $(CXXFLAGS) -DWYHASH_CONDOM=0 12 | 13 | wyhash1: benchmark.cpp wyhash.h 14 | $(CXX) benchmark.cpp -o wyhash1 $(CXXFLAGS) -DWYHASH_CONDOM=1 15 | 16 | wyhash2: benchmark.cpp wyhash.h 17 | $(CXX) benchmark.cpp -o wyhash2 $(CXXFLAGS) -DWYHASH_CONDOM=2 18 | 19 | xxh3scalar: benchmark.cpp 20 | $(CXX) benchmark.cpp -o xxh3scalar $(CXXFLAGS) -DXXH_VECTOR=0 -DXXH3 21 | 22 | xxh3sse2: benchmark.cpp 23 | $(CXX) benchmark.cpp -o xxh3sse2 $(CXXFLAGS) -DXXH_VECTOR=1 -DXXH3 24 | 25 | xxh3avx2: benchmark.cpp 26 | $(CXX) benchmark.cpp -o xxh3avx2 $(CXXFLAGS) -DXXH_VECTOR=2 -DXXH3 27 | 28 | clean: 29 | rm $(TARGETS) 30 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Maxim Zaks 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /checkout_remote_modules.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | function check_out_remote_module() ( 4 | rurl="$1" 5 | shift 6 | declare -a paths 7 | declare -a module_names 8 | for var in "$@" 9 | do 10 | IFS="=" 11 | read -ra module_name_components <<< "$var" 12 | components_count=${#module_name_components[@]} 13 | path=${module_name_components[0]} 14 | module_name=${module_name_components[$components_count-1]} 15 | paths=("${paths[@]}" "$path") 16 | module_names=("${module_names[@]}" "$module_name") 17 | done 18 | IFS=" " 19 | 20 | for module_name in "${module_names[@]}" 21 | do 22 | rm -rf ../$module_name 23 | done 24 | 25 | current_date_time=$(date) 26 | echo "URL: $rurl" 27 | git clone -n --depth=1 --filter=tree:0 $rurl 28 | cd ${rurl##*/} 29 | git sparse-checkout set --no-cone "${paths[@]}" 30 | git checkout 31 | 32 | for i in "${!paths[@]}" 33 | do 34 | module_name=${module_names[$i]} 35 | path=${paths[$i]} 36 | cp -R ./$path ../../$module_name 37 | echo $current_date_time > ../../$module_name/.checkoutinfo 38 | echo "URL: $rurl" >> ../../$module_name/.checkoutinfo 39 | echo "Path: $path" >> ../../$module_name/.checkoutinfo 40 | done 41 | cd ../ 42 | ) 43 | 44 | function checkout()( 45 | # Add check out remote module calls here 46 | 47 | # check_out_remote_module "https://github.com/mzaks/mojo-trees" "fiby_tree" 48 | ) 49 | 50 | mkdir -p "_deps" 51 | cd "_deps" 52 | 53 | checkout 54 | 55 | rm -rf "../_deps" -------------------------------------------------------------------------------- /fxhash/fxhash.mojo: -------------------------------------------------------------------------------- 1 | from bit import rotate_bits_left 2 | 3 | alias ROTATE = 5 4 | alias SEED64 = 0x51_7c_c1_b7_27_22_0a_95 5 | alias SEED32 = 0x9e_37_79_b9 6 | 7 | @always_inline 8 | fn fxhash32(s: String, seed: UInt32 = 0) -> UInt32: 9 | var bytes = UnsafePointer(s.unsafe_ptr()) 10 | var count = len(s) 11 | var hash = seed 12 | while count >= 4: 13 | hash = _hash_word32(hash, bytes.bitcast[DType.uint32]().load()) 14 | bytes = bytes.offset(4) 15 | count -= 4 16 | if count >= 2: 17 | hash = _hash_word32(hash, bytes.bitcast[DType.uint16]().load().cast[DType.uint32]()) 18 | bytes = bytes.offset(2) 19 | count -= 2 20 | if count > 0: 21 | hash = _hash_word32(hash, bytes.load().cast[DType.uint32]()) 22 | return hash 23 | 24 | @always_inline 25 | fn fxhash64(s: String, seed: UInt64 = 0) -> UInt64: 26 | var bytes = UnsafePointer(s.unsafe_ptr()) 27 | var count = len(s) 28 | var hash = seed 29 | while count >= 8: 30 | hash = _hash_word64(hash, bytes.bitcast[DType.uint64]().load()) 31 | bytes = bytes.offset(8) 32 | count -= 8 33 | if count >= 4: 34 | hash = _hash_word64(hash, bytes.bitcast[DType.uint32]().load().cast[DType.uint64]()) 35 | bytes = bytes.offset(4) 36 | count -= 4 37 | if count >= 2: 38 | hash = _hash_word64(hash, bytes.bitcast[DType.uint16]().load().cast[DType.uint64]()) 39 | bytes = bytes.offset(2) 40 | count -= 2 41 | if count > 0: 42 | hash = _hash_word64(hash, bytes.load().cast[DType.uint64]()) 43 | return hash 44 | 45 | 46 | @always_inline 47 | fn _hash_word32(value: UInt32, word: UInt32) -> UInt32: 48 | return (rotate_bits_left[ROTATE](value) ^ word) * SEED32 49 | 50 | @always_inline 51 | fn _hash_word64(value: UInt64, word: UInt64) -> UInt64: 52 | return (rotate_bits_left[ROTATE](value) ^ word) * SEED64 53 | -------------------------------------------------------------------------------- /benchmark_other_languages/c/hash_functions/xxhash.c: -------------------------------------------------------------------------------- 1 | /* 2 | * xxHash - Extremely Fast Hash algorithm 3 | * Copyright (C) 2012-2023 Yann Collet 4 | * 5 | * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php) 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions are 9 | * met: 10 | * 11 | * * Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * * Redistributions in binary form must reproduce the above 14 | * copyright notice, this list of conditions and the following disclaimer 15 | * in the documentation and/or other materials provided with the 16 | * distribution. 17 | * 18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | * 30 | * You can contact the author at: 31 | * - xxHash homepage: https://www.xxhash.com 32 | * - xxHash source repository: https://github.com/Cyan4973/xxHash 33 | */ 34 | 35 | /* 36 | * xxhash.c instantiates functions defined in xxhash.h 37 | */ 38 | 39 | #define XXH_STATIC_LINKING_ONLY /* access advanced declarations */ 40 | #define XXH_IMPLEMENTATION /* access definitions */ 41 | 42 | #include "xxhash.h" -------------------------------------------------------------------------------- /test_sha256.mojo: -------------------------------------------------------------------------------- 1 | # import time 2 | # from sha import sha256_encode 3 | # from testing import assert_equal 4 | # from collections.vector import InlinedFixedVector 5 | 6 | # fn print_hex(digest: InlinedFixedVector[UInt8, 32]): 7 | # var lookup = String("0123456789abcdef") 8 | # var result: String = "" 9 | # for i in range(len(digest)): 10 | # var v = digest[i].to_int() 11 | # result += lookup[(v >> 4)] 12 | # result += lookup[v & 15] 13 | 14 | # print(result) 15 | # print(len(digest)) 16 | # print(len("b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9")) 17 | # print(len("985752100505598575751521005110148569753501015350100551009755100979810297995256521011021015155975351564810110157485656102559799101501011029910010157")) 18 | 19 | # fn main(): 20 | # var bytes = 1024 * 1024 * 256 + 78 21 | # var bytes_to_hash: DynamicVector[UInt8] = kinda_random_bytes(bytes) 22 | # var ptr = DTypePointer[DType.uint8](bytes_to_hash.data.value) 23 | # var buffer = Buffer[DType.uint8](ptr, bytes_to_hash.size) 24 | # var before = time.now() 25 | # var hash = sha256_encode(ptr, bytes) 26 | # var after = time.now() 27 | # var keep_vector_alive = bytes_to_hash[4] 28 | # var ns = after - before 29 | # var seconds = ns / 1_000_000_000 30 | # var megabytes = bytes / 1_000_000 31 | # for i in range(hash.size): 32 | # print(hash[i]) 33 | # print("megabytes per second") 34 | # print(megabytes / seconds) 35 | # var text = "hello world" 36 | # print(text) 37 | # print_hex(sha256_encode(text.data().bitcast[DType.uint8](), len(text))) 38 | 39 | 40 | # fn kinda_random_bytes(length: Int) -> DynamicVector[UInt8]: 41 | # var vec = DynamicVector[UInt8](capacity=length) 42 | # var n: UInt8 = 245 43 | # var cycle: UInt8 = 1 44 | # for i in range(length): 45 | # var shifted = n >> 3 46 | # var shiftalso = n << 4 47 | # var more = shifted ^ n ^ shiftalso 48 | # var next = n + more 49 | # n = next 50 | # cycle ^= n 51 | # vec.append(n + cycle) 52 | 53 | # return vec -------------------------------------------------------------------------------- /benchmark_other_languages/js/hash_functions/package-lock.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "hash_functions", 3 | "lockfileVersion": 2, 4 | "requires": true, 5 | "packages": { 6 | "": { 7 | "dependencies": { 8 | "wyhash": "1.0.0", 9 | "xxhashjs": "0.2.2" 10 | } 11 | }, 12 | "node_modules/cuint": { 13 | "version": "0.2.2", 14 | "resolved": "https://registry.npmjs.org/cuint/-/cuint-0.2.2.tgz", 15 | "integrity": "sha512-d4ZVpCW31eWwCMe1YT3ur7mUDnTXbgwyzaL320DrcRT45rfjYxkt5QWLrmOJ+/UEAI2+fQgKe/fCjR8l4TpRgw==" 16 | }, 17 | "node_modules/wyhash": { 18 | "version": "1.0.0", 19 | "resolved": "https://registry.npmjs.org/wyhash/-/wyhash-1.0.0.tgz", 20 | "integrity": "sha512-3mxXnm7JQTAkxyWcq+POKqUq1cU+Wd9jyhRdAHz2xGuwL1cGjK/xhr73c+/JljnKYaZmmyq6v0Vv3l6t64w8ZQ==" 21 | }, 22 | "node_modules/xxhashjs": { 23 | "version": "0.2.2", 24 | "resolved": "https://registry.npmjs.org/xxhashjs/-/xxhashjs-0.2.2.tgz", 25 | "integrity": "sha512-AkTuIuVTET12tpsVIQo+ZU6f/qDmKuRUcjaqR+OIvm+aCBsZ95i7UVY5WJ9TMsSaZ0DA2WxoZ4acu0sPH+OKAw==", 26 | "dependencies": { 27 | "cuint": "^0.2.2" 28 | } 29 | } 30 | }, 31 | "dependencies": { 32 | "cuint": { 33 | "version": "0.2.2", 34 | "resolved": "https://registry.npmjs.org/cuint/-/cuint-0.2.2.tgz", 35 | "integrity": "sha512-d4ZVpCW31eWwCMe1YT3ur7mUDnTXbgwyzaL320DrcRT45rfjYxkt5QWLrmOJ+/UEAI2+fQgKe/fCjR8l4TpRgw==" 36 | }, 37 | "wyhash": { 38 | "version": "1.0.0", 39 | "resolved": "https://registry.npmjs.org/wyhash/-/wyhash-1.0.0.tgz", 40 | "integrity": "sha512-3mxXnm7JQTAkxyWcq+POKqUq1cU+Wd9jyhRdAHz2xGuwL1cGjK/xhr73c+/JljnKYaZmmyq6v0Vv3l6t64w8ZQ==" 41 | }, 42 | "xxhashjs": { 43 | "version": "0.2.2", 44 | "resolved": "https://registry.npmjs.org/xxhashjs/-/xxhashjs-0.2.2.tgz", 45 | "integrity": "sha512-AkTuIuVTET12tpsVIQo+ZU6f/qDmKuRUcjaqR+OIvm+aCBsZ95i7UVY5WJ9TMsSaZ0DA2WxoZ4acu0sPH+OKAw==", 46 | "requires": { 47 | "cuint": "^0.2.2" 48 | } 49 | } 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /test_md5.mojo: -------------------------------------------------------------------------------- 1 | from md5 import md5_string 2 | from testing import assert_equal 3 | from wyhasher import wyhash 4 | from wyhasher.wyhasher import wymum 5 | 6 | alias alphabete: String = "0123456789abcdef" 7 | 8 | fn to_hex(v: SIMD[DType.uint8, 16]) -> String: 9 | var result: String = "" 10 | for i in range(16): 11 | var h = v[i] >> 4 12 | var l = v[i] & 15 13 | result += alphabete[int(h)] 14 | result += alphabete[int(l)] 15 | return result 16 | 17 | fn main() raises: 18 | var a: String = "Hello 🔥" 19 | assert_equal(to_hex(md5_string(a)), "b9735ea236e0d3103a39ad102a2e990f") 20 | _ = a 21 | var b: String = '米くを舵4物委らご氏松ハナテフ月関ソ時平ふいの博情れじフ牟万い元56園フメヤオ試図ロツヤ未備王こと傷喫羅踊んゆし。栃ユヱオ書著作ユソツロ英祉業ア大課ご権質フべ空8午キ切軟づン著郎そゃす格町採ヱオマコ処8付国ムハチア究表でなだ際無ロミヱ地兵ぴげ庭体すク発抜爆位や。楽富むゆず盛航カナセ携代ハ本高きた員59今骸ンラえぜ城解イケ穴訴ぽぎ属住ヤケトヌ抱点ト広注厚でて。 国リ出難セユメ軍手ヘカウ画形サヲシ猛85用ヲキミ心死よしと身処ケヨミオ教主ーぽ事業んく字国たさょ図能シミスヤ社8板ル岡世58次戒知院んれり。市メ誘根カ数問禁竹ゃれえみ給辺のでみき今二ぎさ裕止過こクすと無32郎所ラた生展ヌヘス成度慣葬勇厘ばてか。室ゃ下携疲ム色権がぽりっ銃週ノオ姫千テム健蔵い研手ッ放容ル告属め旅側26企サノヨ宅都福ぞ通待ちぴね種脳イど労希望義通むン。 罰しい続負せ著低たル異師ユハワ東添質コチ転集ルヤ雇聴約ヒ前統らた情厳ゆさでや真胸や有披暑棚豆ゆぼたけ。盛ワセロナ情競クるっわ講3音ずをせ少地めしぜょ手63明視れに判企ヒヌエソ求総58特本ね井比ユラキ禁頭馬るゅリす能率率かがさわ。葉サソ医郡ヱヘソ労帰ナケスミ救写ワヘ株審ネヒニミ安逮イ人画ラ涯車はラ極騒りなド件5級ンかふー劇41著ぱぐ凱討だ文世ぶづどま界善魅マ渓経競融れがや。 連ーぜらご模分ッ視外ばフく運発群ほぼづ育越一ほごクけ案募ヲイソ治会イせフ製君ぜた漢村1変リヒ構5際ツ御文ヲ臭入さドぼ代書ハケ引技ろみれ回観注倉徹ぱ。論ラづ海要サ情座ゃり齢宣ラモエ芸化エマホ覧催回ら戦69本外ト葬岳な政画か連針ぴリフず。約ル闘辺ぽ経2応掲ホサアラ塾小コラ画決クノオ上室レヌヱ勝逮ぜるえむ責豊チノ明意ひけ訟6碁草メタチエ財午召喝塊む。 決めでわ名金つけレわ続人県約ぽぼす尾腹ユサ戦載リシ護賀レモフツ重涯ニ治者むんっみ職更カタチレ提話2何ワ責東まけげふ能政ヌ供禁がびてわ提改倶れめ。読み担後ぽ安加ぎ論鹿ツ統最お気麻月つじもあ竜思いろめ判必満理トコ文連ムイウハ寄串ざほびー。文ゆこっ向27年メイ便能ノセヲ待1王スねたゆ伝派んね点過カト治読よにきべ使人スシ都言え阻8割べづえみ注引敷的岳犠眠どそ。 学用イだ医客開ロ供界もぞだ実隆モイヌ務坂ナコヲ権野ろづ初場ぱ低会づぱじ新倒コ化政レ止奮浸猪ッわえづ。形いやリ要帰ほまむだ業領スル必打さ島14巻リ集日ネヘホタ面幅ち写上そぴ円図ムタコモ報使イわざと会催ヤヲ康証をドぶレ盤岡ホハツ作29管しをめ公問懐蓄っさ。来ゆぼあぱ投秋シ語右ぐ身靖かば辛握捕家記ヘワ神岐囲づ毘観メテクツ政73夕罪57需93誌飲査仁さ。 変レめ束球よんま会特ヱコ聞重だ史純ーどる件32浦レぴよゃ上強ネラリロ査従セユヤ専棋光レ作表ひぶ予正ぜーな誉確フス函6報円ス進治ね能営済否雄でわょ。42生型ば着続ア短実ぎおめび前環闘ラヤヲル診均っとにの声公トヱテマ整試椅情久妊舌頃ざとっく。品キチトテ阿国ラら受87世ヲフセリ川86個ーょぼげ危子ヘレカメ無会ぱかへ事通んかて電条ロツ徴商ぶぞそを居暑メ害広せもがり禁応レミヲ応響割壮憶はぱ。 千れンが織財メニ況界ネトレミ学豊フオホシ近月レたやご的罪ょな菱技ちる警栗エセ提89林危氷48参ア説森クキヒヱ薬社ホコエリ負和ルび紀下ケミイ掲歳特ごず扱底ク護木連ちクを各形ばすか。変ぱなれ町7融ヌ街準以タユヘム質裕ぶで遺語俊ぎずょ事金文キ写多山ーゆに歩帯すで会世クぜよ論写ヲ達71林危氷5間続ぎぜび高怠す。 係8青け応著ミ戦条ナヘネカ思79未ぎ算伊をゃ泉人ーづ需説っ畑鹿27軽ラソツ権2促千護ルロナカ開国ケ暴嶋ご池表だ。佐フナ訪麻はてせば勝効をあ医戦画とさわぴ者両すいあ並来んば載食ぴ件友頂業へえぞ魚祝ネラ聞率スコリケ始全ンこび夫出ドふ今布うぎふゅ実克即哉循やしんな。 暮す備54依紀てッん末刊と柔称むてス無府ケイ変壌をぱ汁連フマス海世ヌ中負知問ナヘケ純推ひ読着ヒ言若私軽れ。掲けフむ王本オコ線人をっさ必和断セソヲハ図芸ちかな防長りぶは投新意相ツ並5余セ職岳ぞ端古空援そ。森ヨエチ題5東っ自兄ち暴5近鹿横ト的京ハ安氷ナキ深際ぎ並節くスむの権工ほルせ京49効タムチ処三ぞぴラ済国ずっ文経ヘトミ水分準そが。' 22 | assert_equal(to_hex(md5_string(b)), "168f7f85febeb19dbad38502499ea1d0") 23 | _ = b 24 | -------------------------------------------------------------------------------- /benchmark_other_languages/c/hash_functions/xxh3.h: -------------------------------------------------------------------------------- 1 | /* 2 | * xxHash - Extremely Fast Hash algorithm 3 | * Development source file for `xxh3` 4 | * Copyright (C) 2019-2021 Yann Collet 5 | * 6 | * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php) 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions are 10 | * met: 11 | * 12 | * * Redistributions of source code must retain the above copyright 13 | * notice, this list of conditions and the following disclaimer. 14 | * * Redistributions in binary form must reproduce the above 15 | * copyright notice, this list of conditions and the following disclaimer 16 | * in the documentation and/or other materials provided with the 17 | * distribution. 18 | * 19 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | * 31 | * You can contact the author at: 32 | * - xxHash homepage: https://www.xxhash.com 33 | * - xxHash source repository: https://github.com/Cyan4973/xxHash 34 | */ 35 | 36 | /* 37 | * Note: This file used to host the source code of XXH3_* variants. 38 | * during the development period. 39 | * The source code is now properly integrated within xxhash.h. 40 | * 41 | * xxh3.h is no longer useful, 42 | * but it is still provided for compatibility with source code 43 | * which used to include it directly. 44 | * 45 | * Programs are now highly discouraged to include xxh3.h. 46 | * Include `xxhash.h` instead, which is the officially supported interface. 47 | * 48 | * In the future, xxh3.h will start to generate warnings, then errors, 49 | * then it will be removed from source package and from include directory. 50 | */ 51 | 52 | /* Simulate the same impact as including the old xxh3.h source file */ 53 | 54 | #define XXH_INLINE_ALL 55 | #include "xxhash.h" -------------------------------------------------------------------------------- /benchmark_hash_words_file.mojo: -------------------------------------------------------------------------------- 1 | from time import now 2 | from md5 import md5_string 3 | from wyhasher import wyhash 4 | from ahasher import ahash 5 | from fxhash import fxhash64 6 | from sha import sha256_encode 7 | 8 | import benchmark 9 | from benchmark import Unit 10 | from pathlib import Path 11 | from collections.vector import InlinedFixedVector 12 | 13 | fn to_hex(digest: InlinedFixedVector[UInt8, 32]) -> String: 14 | var lookup = String("0123456789abcdef") 15 | var result: String = "" 16 | for i in range(len(digest)): 17 | var v = int(digest[i]) 18 | result += lookup[(v >> 4)] 19 | result += lookup[v & 15] 20 | return result 21 | 22 | fn to_hex(digest: SIMD[DType.uint8, 16]) -> String: 23 | var lookup = String("0123456789abcdef") 24 | var result: String = "" 25 | for i in range(len(digest)): 26 | var v = int(digest[i]) 27 | result += lookup[(v >> 4)] 28 | result += lookup[v & 15] 29 | return result 30 | 31 | fn main() raises: 32 | var text = Path("/usr/share/dict/words").read_text() 33 | var tik = now() 34 | var h0 = md5_string(text) 35 | var tok = now() 36 | print("MD5 :", tok - tik, to_hex(h0), len(text)) 37 | 38 | tik = now() 39 | var h5 = sha256_encode(text.unsafe_ptr(), 0) 40 | tok = now() 41 | print("SHA256 :", tok - tik, to_hex(h5), len(text)) 42 | 43 | tik = now() 44 | var h1 = wyhash(text, 0) 45 | tok = now() 46 | print("Wyhash :", tok - tik, h1, len(text)) 47 | 48 | tik = now() 49 | var h2 = ahash(text) 50 | tok = now() 51 | print("Ahash :", tok - tik, h2, len(text)) 52 | 53 | tik = now() 54 | var h3 = fxhash64(text) 55 | tok = now() 56 | print("Fxhash :", tok - tik, h3, len(text)) 57 | 58 | tik = now() 59 | var h4 = hash(text.unsafe_ptr(), len(text)) 60 | tok = now() 61 | print("Std hash:", tok - tik, h4, len(text)) 62 | 63 | var hb = SIMD[DType.uint8, 16]() 64 | 65 | @parameter 66 | fn md5_test(): 67 | hb = md5_string(text) 68 | print("===MD5===") 69 | var report0 = benchmark.run[md5_test]() 70 | report0.print(Unit.ns) 71 | print(hb) 72 | 73 | var hi = 0 74 | 75 | @parameter 76 | fn hash_test(): 77 | hi = hash(text.unsafe_ptr(), len(text)) 78 | 79 | print("===Std hash===") 80 | var report1 = benchmark.run[hash_test]() 81 | report1.print(Unit.ns) 82 | print(hi) 83 | 84 | var hu = UInt64(0) 85 | 86 | @parameter 87 | fn ahash_test(): 88 | hu = ahash(text) 89 | 90 | print("===Ahash===") 91 | var report2 = benchmark.run[ahash_test]() 92 | report2.print(Unit.ns) 93 | print(hu) 94 | 95 | @parameter 96 | fn wyhash_test(): 97 | hu = wyhash(text, 0) 98 | 99 | print("===Wyhash===") 100 | var report3 = benchmark.run[wyhash_test]() 101 | report3.print(Unit.ns) 102 | print(hu) 103 | 104 | _ = text 105 | -------------------------------------------------------------------------------- /benchmark_other_languages/c/hash_functions/benchmark_wyhash.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #ifndef XXH3 10 | #include "wyhash.h" 11 | #else 12 | #include "xxh3.h" 13 | #endif 14 | 15 | using namespace std; 16 | 17 | struct ha 18 | { 19 | size_t operator()(const string &s, uint64_t seed)const 20 | { 21 | #ifndef XXH3 22 | return wyhash(s.c_str(),s.size(),seed,_wyp); 23 | #else 24 | return XXH3_64bits_withSeed(s.c_str(),s.size(),seed); 25 | #endif 26 | } 27 | }; 28 | 29 | vector v; 30 | template 31 | uint64_t bench_hash(const char *name) 32 | { 33 | Hasher h; 34 | string s; 35 | timeval beg, end; 36 | uint64_t dummy=0; 37 | const uint64_t N=v.size(), R=0x1000; 38 | 39 | cerr.precision(2); 40 | cerr.setf(ios::fixed); 41 | cerr<<'|'<1) 70 | { 71 | if(help_s.compare(argv[1])==0) 72 | { 73 | cout<<"usage:\n"<\n"; 74 | cout<<"if no arguments given \'"<>s; !fi.eof(); fi>>s) 87 | if(s.size()) 88 | v.push_back(s); 89 | fi.close(); 90 | //shuffle the array to benchmark random access 91 | for(size_t i=v.size()-1; i; i--) 92 | swap(v[i],v[rand()%(i+1)]); 93 | 94 | uint64_t r=0; 95 | cerr<("wyhash"); 101 | #else 102 | r+=bench_hash("xxh3"); 103 | #endif 104 | 105 | return r; 106 | } -------------------------------------------------------------------------------- /wyhasher/wyhasher.mojo: -------------------------------------------------------------------------------- 1 | 2 | from bit import bit_width, byte_swap 3 | from bit import rotate_bits_right 4 | 5 | alias U128 = SIMD[DType.uint64, 2] 6 | alias U256 = SIMD[DType.uint64, 4] 7 | alias default_secret = SIMD[DType.uint64, 4](0x2d358dccaa6c78a5, 0x8bb84b93962eacc9, 0x4b33a62ed433d4a3, 0x4d5a2da51de1aa47) 8 | 9 | @always_inline 10 | fn wymum_32(inout a: UInt64, inout b: UInt64): 11 | var ab = U128(a, b) 12 | var abl = ab & 0xff_ff_ff_ff 13 | var abh = ab >> 32 14 | var hh = abh.reduce_mul() 15 | var hl = abh[0] * abl[1] 16 | var ll = abl.reduce_mul() 17 | var lh = abl[0] * abh[1] 18 | a, b = rotate_bits_right[32](hl) ^ hh, rotate_bits_right[32](lh) ^ ll 19 | 20 | @always_inline 21 | fn wymum(inout a: UInt64, inout b: UInt64): 22 | var ab = U128(a, b) 23 | var abl = ab & 0xff_ff_ff_ff 24 | var abh = ab >> 32 25 | var hh = abh.reduce_mul() 26 | var hl = abh[0] * abl[1] 27 | var ll = abl.reduce_mul() 28 | var lh = abl[0] * abh[1] 29 | var t = ll + (hl << 32) 30 | var lo = t + (lh << 32) 31 | var c = (t < ll).cast[DType.uint64]() 32 | c += (lo < t).cast[DType.uint64]() 33 | var hi = hh + (hl >> 32) + (lh >> 32) + c 34 | a, b = lo, hi 35 | 36 | @always_inline 37 | fn wy_mix(_a: UInt64, _b: UInt64) -> UInt64: 38 | var a = _a 39 | var b = _b 40 | wymum(a, b) 41 | return a ^ b 42 | 43 | @always_inline 44 | fn wyr8(p: UnsafePointer[UInt8]) -> UInt64: 45 | return p.bitcast[DType.uint64]().load() 46 | 47 | @always_inline 48 | fn wyr4(p: UnsafePointer[UInt8]) -> UInt64: 49 | return p.bitcast[DType.uint32]().load().cast[DType.uint64]() 50 | 51 | @always_inline 52 | fn wyr3(p: UnsafePointer[UInt8], k: Int) -> UInt64: 53 | return (p.load().cast[DType.uint64]() << 16) 54 | | (p.offset(k >> 1).load().cast[DType.uint64]() << 8) 55 | | p.offset(k - 1).load().cast[DType.uint64]() 56 | 57 | fn wyhash(key: String, _seed: UInt64, secret: U256 = default_secret) -> UInt64: 58 | var length = len(key) 59 | var p = UnsafePointer(key.unsafe_ptr()) 60 | var seed = _seed ^ wy_mix(_seed ^ secret[0], secret[1]) 61 | var a: UInt64 = 0 62 | var b: UInt64 = 0 63 | if length <= 16: 64 | if length >= 4: 65 | var last_part_index = (length >> 3) << 2 66 | a = (wyr4(p) << 32) | wyr4(p.offset(last_part_index)) 67 | b = (wyr4(p.offset(length - 4)) << 32) | wyr4(p.offset(length - 4 - last_part_index)) 68 | elif length > 0: 69 | a = wyr3(p, length) 70 | else: 71 | var see1 = seed 72 | var see2 = seed 73 | 74 | while length >= 48: 75 | seed = wy_mix(wyr8(p) ^ secret[1], wyr8(p + 8) ^ seed) 76 | see1 = wy_mix(wyr8(p + 16) ^ secret[2], wyr8(p + 24) ^ see1) 77 | see2 = wy_mix(wyr8(p + 32) ^ secret[3], wyr8(p + 40) ^ see2) 78 | p = p.offset(48) 79 | length -= 48 80 | 81 | seed ^= see1 ^ see2 82 | 83 | while length > 16: 84 | var p64 = p.bitcast[DType.uint64]() 85 | var data = p64.load[width=2]() 86 | var seed_values = U128(secret[1], seed) 87 | var seeded_data = data ^ seed_values 88 | seed = wy_mix(seeded_data[0], seeded_data[1]) 89 | p = p.offset(16) 90 | length -= 16 91 | a = wyr8(p.offset(length-16)) 92 | b = wyr8(p.offset(length-8)) 93 | 94 | a ^= secret[1] 95 | b ^= seed 96 | wymum(a, b) 97 | 98 | return wy_mix(a ^ secret[0] ^ len(key), b ^ secret[1]) 99 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | -------------------------------------------------------------------------------- /ahasher/ahasher.mojo: -------------------------------------------------------------------------------- 1 | from bit import byte_swap 2 | from bit import rotate_bits_left 3 | 4 | alias U256 = SIMD[DType.uint64, 4] 5 | alias U128 = SIMD[DType.uint64, 2] 6 | alias MULTIPLE = 6364136223846793005 7 | alias ROT = 23 8 | 9 | 10 | @always_inline 11 | fn folded_multiply(s: UInt64, by: UInt64) -> UInt64: 12 | var b1 = s * byte_swap(by) 13 | var b2 = byte_swap(s) * (~by) 14 | return b1 ^ byte_swap(b2) 15 | 16 | 17 | @always_inline 18 | fn read_small(data: UnsafePointer[UInt8], length: Int) -> U128: 19 | if length >= 2: 20 | if length >= 4: 21 | # len 4-8 22 | var a = data.bitcast[DType.uint32]().load().cast[DType.uint64]() 23 | var b = data.offset(length - 4).bitcast[DType.uint32]().load().cast[DType.uint64]() 24 | return U128(a, b) 25 | else: 26 | var a = data.bitcast[DType.uint16]().load().cast[DType.uint64]() 27 | var b = data.offset(length - 1).load().cast[DType.uint64]() 28 | return U128(a, b) 29 | else: 30 | if length > 0: 31 | var a = data.load().cast[DType.uint64]() 32 | return U128(a, a) 33 | else: 34 | return U128(0, 0) 35 | 36 | struct AHasher: 37 | var buffer: UInt64 38 | var pad: UInt64 39 | var extra_keys: U128 40 | 41 | fn __init__(inout self, key: U256): 42 | var pi_key = key ^ U256(0x243f_6a88_85a3_08d3, 0x1319_8a2e_0370_7344, 0xa409_3822_299f_31d0, 0x082e_fa98_ec4e_6c89,) 43 | self.buffer = pi_key[0] 44 | self.pad = pi_key[1] 45 | self.extra_keys = U128(pi_key[2], pi_key[3]) 46 | 47 | @always_inline 48 | fn update(inout self, new_data: UInt64): 49 | self.buffer = folded_multiply(new_data ^ self.buffer, MULTIPLE) 50 | 51 | @always_inline 52 | fn large_update(inout self, new_data: U128): 53 | var combined = folded_multiply( 54 | new_data[0] ^ self.extra_keys[0], new_data[1] ^ self.extra_keys[1] 55 | ) 56 | self.buffer = rotate_bits_left[ROT]((self.buffer + self.pad) ^ combined) 57 | 58 | @always_inline 59 | fn short_finish(self) -> UInt64: 60 | return self.buffer + self.pad 61 | 62 | @always_inline 63 | fn finish(self) -> UInt64: 64 | var rot = self.buffer & 63 65 | var folded = folded_multiply(self.buffer, self.pad) 66 | return (folded << rot) | (folded >> (64 - rot)) 67 | 68 | @always_inline 69 | fn write(inout self, data: UnsafePointer[UInt8], length: Int): 70 | self.buffer = (self.buffer + length) * MULTIPLE 71 | if length > 8: 72 | if length > 16: 73 | var tail = data.offset(length - 16).bitcast[DType.uint64]().load[width=2]() 74 | self.large_update(tail) 75 | var offset = 0 76 | while length - offset > 16: 77 | var block = data.offset(offset).bitcast[DType.uint64]().load[width=2]() 78 | self.large_update(block) 79 | offset += 16 80 | else: 81 | var a = data.bitcast[DType.uint64]().load() 82 | var b = data.offset(length - 8).bitcast[DType.uint64]().load() 83 | self.large_update(U128(a, b)) 84 | else: 85 | var value = read_small(data, length) 86 | self.large_update(value) 87 | 88 | @always_inline 89 | fn ahash(s: String) -> UInt64: 90 | var length = len(s) 91 | var b = s.unsafe_ptr() 92 | var hasher = AHasher(U256(0, 0, 0, 0)) 93 | 94 | if length > 8: 95 | hasher.write(b, length) 96 | else: 97 | var value = read_small(b, length) 98 | hasher.buffer = folded_multiply(value[0] ^ hasher.buffer, value[1] ^ hasher.extra_keys[1]) 99 | hasher.pad = hasher.pad + length 100 | 101 | return hasher.finish() 102 | -------------------------------------------------------------------------------- /benchmark_other_languages/swift/hash_functions/.swiftpm/xcode/xcshareddata/xcschemes/hash_functions.xcscheme: -------------------------------------------------------------------------------- 1 | 2 | 5 | 8 | 9 | 15 | 21 | 22 | 23 | 29 | 35 | 36 | 37 | 38 | 39 | 45 | 46 | 48 | 54 | 55 | 56 | 57 | 58 | 68 | 70 | 76 | 77 | 78 | 79 | 85 | 86 | 92 | 93 | 94 | 95 | 97 | 98 | 101 | 102 | 103 | -------------------------------------------------------------------------------- /md5/md5.mojo: -------------------------------------------------------------------------------- 1 | # Based on https://github.com/Zunawe/md5-c 2 | 3 | from utils.loop import unroll 4 | from memory.unsafe import bitcast 5 | from memory import memset_zero 6 | from bit import rotate_bits_left 7 | 8 | alias S = SIMD[DType.uint32, 64]( 9 | 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 10 | 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, 11 | 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 12 | 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21 13 | ) 14 | 15 | alias K = SIMD[DType.uint32, 64]( 16 | 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, 17 | 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501, 18 | 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be, 19 | 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821, 20 | 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa, 21 | 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8, 22 | 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed, 23 | 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a, 24 | 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c, 25 | 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70, 26 | 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05, 27 | 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665, 28 | 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039, 29 | 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1, 30 | 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1, 31 | 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391 32 | ) 33 | 34 | alias PADDING = create_padding() 35 | 36 | fn create_padding() -> UnsafePointer[UInt8]: 37 | var result = UnsafePointer[UInt8].alloc(64) 38 | result.store(0, 0x80) 39 | for i in range(1, 64): 40 | result.store(i, 0) 41 | return result 42 | 43 | struct Md5Context: 44 | var buffer: SIMD[DType.uint32, 4] 45 | var input: SIMD[DType.uint8, 64] 46 | var digest: SIMD[DType.uint8, 16] 47 | var size: UInt64 48 | 49 | fn __init__(inout self): 50 | self.size = 0 51 | self.buffer = SIMD[DType.uint32, 4](0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476) 52 | self.input = SIMD[DType.uint8, 64]() 53 | self.digest = SIMD[DType.uint8, 16]() 54 | 55 | @always_inline 56 | fn update(inout self, input_buffer: UnsafePointer[UInt8], length: Int): 57 | var offset = int(self.size & 63) 58 | var input = SIMD[DType.uint32, 16]() 59 | self.size += length 60 | 61 | for i in range(length): 62 | self.input[offset] = input_buffer.offset(i).load() 63 | offset += 1 64 | if offset & 63 == 0: 65 | # TODO: check if it works on BigEndian arch (or needs bswap?) 66 | input = bitcast[DType.uint32, 16](self.input) 67 | self.step(input) 68 | offset = 0 69 | 70 | @always_inline 71 | fn finalize(owned self) -> SIMD[DType.uint8, 16]: 72 | var input = SIMD[DType.uint32, 16]() 73 | var offset = int(self.size & 63) 74 | var padding_length = 56 - offset if offset < 56 else 56 + 64 - offset 75 | 76 | self.update(PADDING, padding_length) 77 | self.size -= padding_length 78 | input = bitcast[DType.uint32, 16](self.input) 79 | input[14] = (self.size * 8).cast[DType.uint32]() 80 | input[15] = ((self.size * 8) >> 32).cast[DType.uint32]() 81 | self.step(input) 82 | return bitcast[DType.uint8, 16](self.buffer) 83 | 84 | @always_inline 85 | fn step(inout self, input: SIMD[DType.uint32, 16]): 86 | var aa = self.buffer[0] 87 | var bb = self.buffer[1] 88 | var cc = self.buffer[2] 89 | var dd = self.buffer[3] 90 | 91 | var e: UInt32 = 0 92 | var j = 0 93 | 94 | @parameter 95 | fn shuffle[i: Int](): 96 | alias step = i >> 4 97 | @parameter 98 | if step == 0: 99 | e = (bb & cc) | (~bb & dd) 100 | j = i 101 | elif step == 1: 102 | e = (bb & dd) | (cc & ~dd) 103 | j = (i * 5 + 1) & 15 104 | elif step == 2: 105 | e = bb ^ cc ^ dd 106 | j = (i * 3 + 5) & 15 107 | else: 108 | e = cc ^ (bb | ~dd) 109 | j = (i * 7) & 15 110 | aa, bb, cc, dd = dd, bb + rotate_bits_left[int(S[i])](aa + e + K[i] + input[j]), bb, cc 111 | 112 | unroll[shuffle, 64]() 113 | 114 | self.buffer += SIMD[DType.uint32, 4](aa, bb, cc, dd) 115 | 116 | @always_inline 117 | fn md5_string(value: String) -> SIMD[DType.uint8, 16]: 118 | var ctx = Md5Context() 119 | ctx.update(value.unsafe_ptr(), len(value)) 120 | return ctx^.finalize() 121 | -------------------------------------------------------------------------------- /benchmark_other_languages/rust/hash_functions/Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "ahash" 7 | version = "0.8.6" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "91429305e9f0a25f6205c5b8e0d2db09e0708a7a6df0f42212bb56c32c8ac97a" 10 | dependencies = [ 11 | "cfg-if", 12 | "getrandom", 13 | "once_cell", 14 | "version_check", 15 | "zerocopy", 16 | ] 17 | 18 | [[package]] 19 | name = "byteorder" 20 | version = "1.5.0" 21 | source = "registry+https://github.com/rust-lang/crates.io-index" 22 | checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" 23 | 24 | [[package]] 25 | name = "cfg-if" 26 | version = "1.0.0" 27 | source = "registry+https://github.com/rust-lang/crates.io-index" 28 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 29 | 30 | [[package]] 31 | name = "fxhash" 32 | version = "0.2.1" 33 | source = "registry+https://github.com/rust-lang/crates.io-index" 34 | checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" 35 | dependencies = [ 36 | "byteorder", 37 | ] 38 | 39 | [[package]] 40 | name = "getrandom" 41 | version = "0.2.11" 42 | source = "registry+https://github.com/rust-lang/crates.io-index" 43 | checksum = "fe9006bed769170c11f845cf00c7c1e9092aeb3f268e007c3e760ac68008070f" 44 | dependencies = [ 45 | "cfg-if", 46 | "libc", 47 | "wasi", 48 | ] 49 | 50 | [[package]] 51 | name = "hash_functions" 52 | version = "0.1.0" 53 | dependencies = [ 54 | "ahash", 55 | "fxhash", 56 | "md5", 57 | "rand", 58 | "wyhash2", 59 | ] 60 | 61 | [[package]] 62 | name = "libc" 63 | version = "0.2.151" 64 | source = "registry+https://github.com/rust-lang/crates.io-index" 65 | checksum = "302d7ab3130588088d277783b1e2d2e10c9e9e4a16dd9050e6ec93fb3e7048f4" 66 | 67 | [[package]] 68 | name = "md5" 69 | version = "0.7.0" 70 | source = "registry+https://github.com/rust-lang/crates.io-index" 71 | checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771" 72 | 73 | [[package]] 74 | name = "no-std-compat" 75 | version = "0.4.1" 76 | source = "registry+https://github.com/rust-lang/crates.io-index" 77 | checksum = "b93853da6d84c2e3c7d730d6473e8817692dd89be387eb01b94d7f108ecb5b8c" 78 | 79 | [[package]] 80 | name = "once_cell" 81 | version = "1.19.0" 82 | source = "registry+https://github.com/rust-lang/crates.io-index" 83 | checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" 84 | 85 | [[package]] 86 | name = "ppv-lite86" 87 | version = "0.2.17" 88 | source = "registry+https://github.com/rust-lang/crates.io-index" 89 | checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" 90 | 91 | [[package]] 92 | name = "proc-macro2" 93 | version = "1.0.71" 94 | source = "registry+https://github.com/rust-lang/crates.io-index" 95 | checksum = "75cb1540fadbd5b8fbccc4dddad2734eba435053f725621c070711a14bb5f4b8" 96 | dependencies = [ 97 | "unicode-ident", 98 | ] 99 | 100 | [[package]] 101 | name = "quote" 102 | version = "1.0.33" 103 | source = "registry+https://github.com/rust-lang/crates.io-index" 104 | checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" 105 | dependencies = [ 106 | "proc-macro2", 107 | ] 108 | 109 | [[package]] 110 | name = "rand" 111 | version = "0.8.5" 112 | source = "registry+https://github.com/rust-lang/crates.io-index" 113 | checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" 114 | dependencies = [ 115 | "libc", 116 | "rand_chacha", 117 | "rand_core", 118 | ] 119 | 120 | [[package]] 121 | name = "rand_chacha" 122 | version = "0.3.1" 123 | source = "registry+https://github.com/rust-lang/crates.io-index" 124 | checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" 125 | dependencies = [ 126 | "ppv-lite86", 127 | "rand_core", 128 | ] 129 | 130 | [[package]] 131 | name = "rand_core" 132 | version = "0.6.4" 133 | source = "registry+https://github.com/rust-lang/crates.io-index" 134 | checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" 135 | dependencies = [ 136 | "getrandom", 137 | ] 138 | 139 | [[package]] 140 | name = "syn" 141 | version = "2.0.42" 142 | source = "registry+https://github.com/rust-lang/crates.io-index" 143 | checksum = "5b7d0a2c048d661a1a59fcd7355baa232f7ed34e0ee4df2eef3c1c1c0d3852d8" 144 | dependencies = [ 145 | "proc-macro2", 146 | "quote", 147 | "unicode-ident", 148 | ] 149 | 150 | [[package]] 151 | name = "unicode-ident" 152 | version = "1.0.12" 153 | source = "registry+https://github.com/rust-lang/crates.io-index" 154 | checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" 155 | 156 | [[package]] 157 | name = "version_check" 158 | version = "0.9.4" 159 | source = "registry+https://github.com/rust-lang/crates.io-index" 160 | checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" 161 | 162 | [[package]] 163 | name = "wasi" 164 | version = "0.11.0+wasi-snapshot-preview1" 165 | source = "registry+https://github.com/rust-lang/crates.io-index" 166 | checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" 167 | 168 | [[package]] 169 | name = "wyhash2" 170 | version = "0.2.1" 171 | source = "registry+https://github.com/rust-lang/crates.io-index" 172 | checksum = "9433c7c86e328a8197038c9fc31f6e5c81f9c7bdc087d86e266680236af1af1b" 173 | dependencies = [ 174 | "no-std-compat", 175 | ] 176 | 177 | [[package]] 178 | name = "zerocopy" 179 | version = "0.7.32" 180 | source = "registry+https://github.com/rust-lang/crates.io-index" 181 | checksum = "74d4d3961e53fa4c9a25a8637fc2bfaf2595b3d3ae34875568a5cf64787716be" 182 | dependencies = [ 183 | "zerocopy-derive", 184 | ] 185 | 186 | [[package]] 187 | name = "zerocopy-derive" 188 | version = "0.7.32" 189 | source = "registry+https://github.com/rust-lang/crates.io-index" 190 | checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" 191 | dependencies = [ 192 | "proc-macro2", 193 | "quote", 194 | "syn", 195 | ] 196 | -------------------------------------------------------------------------------- /benchmark_hash_functions.mojo: -------------------------------------------------------------------------------- 1 | from collections import Set 2 | from time import now 3 | from memory.unsafe import bitcast 4 | # from fiby_tree import FibyTree 5 | from my_utils import int_cmp64, int_to_str64, cmp_str, stsl, int_cmp, int_to_str, corpus1, corpus2, corpus3, corpus4, corpus5, corpus6, corpus7, corpus8 6 | from ahasher import ahash 7 | from wyhasher import wyhash 8 | from fnv1a import fnv1a64, fnv1a32 9 | from fxhash import fxhash64, fxhash32 10 | from md5 import md5_string 11 | # from rapidhash import rapid_hash 12 | from o1hash import o1_hash 13 | 14 | @always_inline 15 | fn std_hash64(s: String) -> UInt64: 16 | return hash(s) 17 | 18 | 19 | @always_inline 20 | fn md5_hash(s: String) -> UInt64: 21 | return bitcast[DType.uint64, 2](md5_string(s))[0] 22 | 23 | fn benchamark[hashfn: fn(String) -> UInt64, steps: Int = 20](corpus: List[String], name: StringLiteral, ): 24 | # var f = FibyTree[UInt64, int_cmp64, int_to_str64]() 25 | # var f1 = FibyTree[UInt64, int_cmp64, int_to_str64]() 26 | var fs = Set[String]() 27 | var min_avg: Float64 = 100000.0 28 | var mod = (1 << 9) 29 | var hashes = List[UInt64]() 30 | var mod_hashes: List[UInt64] = List[UInt64]() 31 | var total = 0 32 | for step in range(steps): 33 | for i in range(len(corpus)): 34 | var key = corpus[i] 35 | var tik = now() 36 | var hash = hashfn(key) 37 | var tok = now() 38 | # hash_total += hash 39 | total += tok - tik 40 | var found = False 41 | for i in range(len(hashes)): 42 | if hash == hashes[i]: 43 | found = True 44 | break 45 | if not found: 46 | hashes.append(hash) 47 | found = False 48 | for i in range(len(mod_hashes)): 49 | if hash & (mod - 1) == mod_hashes[i]: 50 | found = True 51 | break 52 | if not found: 53 | mod_hashes.append(hash & (mod - 1))# f.add(hash) 54 | # f1.add(hash & (mod - 1)) 55 | if step == 0: 56 | fs.add(key) 57 | var c_avg = (total / steps) / len(corpus) 58 | min_avg = min(min_avg, c_avg) 59 | print( 60 | name, "avg hash compute", min_avg, "| hash colision", len(fs) / len(hashes), 61 | "| hash colision mod", mod, len(fs) / len(mod_hashes) 62 | ) 63 | 64 | fn benchamark32[hashfn: fn(String) -> UInt32, steps: Int = 20](corpus: List[String], name: StringLiteral): 65 | # var f = FibyTree[UInt32, int_cmp, int_to_str]() 66 | # var f1 = FibyTree[UInt32, int_cmp, int_to_str]() 67 | var fs = Set[String]() 68 | var min_avg: Float64 = 100000.0 69 | var mod = (1 << 9) 70 | var hashes: List[UInt32] = List[UInt32]() 71 | var mod_hashes: List[UInt32] = List[UInt32]() 72 | var total = 0 73 | for step in range(steps): 74 | for i in range(len(corpus)): 75 | var key = corpus[i] 76 | var tik = now() 77 | var hash = hashfn(key) 78 | var tok = now() 79 | total += tok - tik 80 | var found = False 81 | for i in range(len(hashes)): 82 | if hash == hashes[i]: 83 | found = True 84 | break 85 | if not found: 86 | hashes.append(hash) 87 | found = False 88 | for i in range(len(mod_hashes)): 89 | if hash & (mod - 1) == mod_hashes[i]: 90 | found = True 91 | break 92 | if not found: 93 | mod_hashes.append(hash & (mod - 1)) 94 | # f.add(hash) 95 | # f1.add(hash & (mod - 1)) 96 | if step == 0: 97 | fs.add(key) 98 | var c_avg = (total / steps) / len(corpus) 99 | min_avg = min(min_avg, c_avg) 100 | print( 101 | name, "avg hash compute", min_avg, "| hash colision", len(fs) / len(hashes), 102 | "| hash colision mod", mod, len(fs) / len(mod_hashes) 103 | ) 104 | 105 | 106 | fn corpus_details(corpus: List[String]): 107 | var word_count = len(corpus) 108 | # print(word_count) 109 | var fs = Set[String]() 110 | var min_key_size = 10000000 111 | var max_key_size = 0 112 | var total_key_size = 0 113 | for i in range(word_count - 1): 114 | var key = corpus[i] 115 | fs.add(key) 116 | var key_size = len(key) 117 | # print(key_size) 118 | total_key_size += key_size 119 | min_key_size = min(min_key_size, key_size) 120 | max_key_size = max(max_key_size, key_size) 121 | 122 | print( 123 | "Word count", word_count, "| unique word count", 124 | len(fs), 125 | "| min key size", min_key_size, "| avg key size", total_key_size / word_count, "| max key size", max_key_size 126 | ) 127 | 128 | 129 | fn sample_wyhash(s : String) -> UInt64: 130 | var default_secret = SIMD[DType.uint64, 4](0xa0761d6478bd642f, 0xe7037ed1a0b428db, 0x8ebc6af09c88c6e3, 0x589965cc75374cc3) 131 | return wyhash(s, 0, default_secret) 132 | 133 | # fn sample_rapidhash(s : String) -> UInt64: 134 | # var hash = rapid_hash(s.unsafe_ptr(), len(s)) 135 | # _ = s 136 | # return hash 137 | 138 | fn sample_fxhash64(s : String) -> UInt64: 139 | return fxhash64(s, 0) 140 | 141 | fn sample_fxhash32(s : String) -> UInt32: 142 | return fxhash32(s, 0) 143 | 144 | fn main() raises: 145 | var c1 = corpus1() 146 | print("\nCorpus 1") 147 | corpus_details(c1) 148 | benchamark[ahash](c1, "AHash") 149 | benchamark[sample_wyhash](c1, "Wyhash") 150 | # benchamark[sample_rapidhash](c1, "Rapidhash") 151 | benchamark32[fnv1a32](c1, "fnv1a32") 152 | benchamark[fnv1a64](c1, "fnv1a64") 153 | benchamark32[sample_fxhash32](c1, "fxHash32") 154 | benchamark[sample_fxhash64](c1, "fxHash64") 155 | benchamark[std_hash64](c1, "std_Hash64") 156 | benchamark[o1_hash](c1, "o1Hash") 157 | benchamark[md5_hash](c1, "MD5") 158 | 159 | var c2 = corpus2() 160 | print("\nCorpus 2") 161 | corpus_details(c2) 162 | benchamark[ahash](c2, "AHash") 163 | benchamark[sample_wyhash](c2, "Wyhash") 164 | # benchamark[sample_rapidhash](c2, "Rapidhash") 165 | benchamark32[fnv1a32](c2, "fnv1a32") 166 | benchamark[fnv1a64](c2, "fnv1a64") 167 | benchamark32[sample_fxhash32](c2, "fxHash32") 168 | benchamark[sample_fxhash64](c2, "fxHash64") 169 | benchamark[std_hash64](c2, "std_Hash64") 170 | benchamark[o1_hash](c2, "o1Hash") 171 | benchamark[md5_hash](c2, "MD5") 172 | 173 | # var c3 = corpus3() 174 | # print("\nCorpus 3") 175 | # corpus_details(c3) 176 | # benchamark[ahash](c3, "AHash") 177 | # benchamark[sample_wyhash](c3, "Wyhash") 178 | # benchamark[sample_rapidhash](c3, "Rapidhash") 179 | # benchamark32[fnv1a32](c3, "fnv1a32") 180 | # benchamark[fnv1a64](c3, "fnv1a64") 181 | # benchamark32[sample_fxhash32](c3, "fxHash32") 182 | # benchamark[sample_fxhash64](c3, "fxHash64") 183 | # benchamark[std_hash64](c3, "std_Hash64") 184 | # benchamark[o1_hash](c3, "o1Hash") 185 | # benchamark[md5_hash](c3, "MD5") 186 | 187 | # var c4 = corpus4() 188 | # print("\nCorpus 4") 189 | # corpus_details(c4) 190 | # benchamark[ahash](c4, "AHash") 191 | # benchamark[sample_wyhash](c4, "Wyhash") 192 | # benchamark[sample_rapidhash](c4, "Rapidhash") 193 | # benchamark32[fnv1a32](c4, "fnv1a32") 194 | # benchamark[fnv1a64](c4, "fnv1a64") 195 | # benchamark32[sample_fxhash32](c4, "fxHash32") 196 | # benchamark[sample_fxhash64](c4, "fxHash64") 197 | # benchamark[std_hash64](c4, "std_Hash64") 198 | # benchamark[o1_hash](c4, "o1Hash") 199 | # benchamark[md5_hash](c4, "MD5") 200 | 201 | # var c5 = corpus5() 202 | # print("\nCorpus 5") 203 | # corpus_details(c5) 204 | # benchamark[ahash](c5, "AHash") 205 | # benchamark[sample_wyhash](c5, "Wyhash") 206 | # benchamark[sample_rapidhash](c5, "Rapidhash") 207 | # benchamark32[fnv1a32](c5, "fnv1a32") 208 | # benchamark[fnv1a64](c5, "fnv1a64") 209 | # benchamark32[sample_fxhash32](c5, "fxHash32") 210 | # benchamark[sample_fxhash64](c5, "fxHash64") 211 | # benchamark[std_hash64](c5, "std_Hash64") 212 | # benchamark[o1_hash](c5, "o1Hash") 213 | # benchamark[md5_hash](c5, "MD5") 214 | 215 | # var c6 = corpus6() 216 | # print("\nCorpus 6") 217 | # corpus_details(c6) 218 | # benchamark[ahash](c6, "AHash") 219 | # benchamark[sample_wyhash](c6, "Wyhash") 220 | # benchamark[sample_rapidhash](c6, "Rapidhash") 221 | # benchamark32[fnv1a32](c6, "fnv1a32") 222 | # benchamark[fnv1a64](c6, "fnv1a64") 223 | # benchamark32[sample_fxhash32](c6, "fxHash32") 224 | # benchamark[sample_fxhash64](c6, "fxHash64") 225 | # benchamark[std_hash64](c6, "std_Hash64") 226 | # benchamark[o1_hash](c6, "o1Hash") 227 | # benchamark[md5_hash](c6, "MD5") 228 | 229 | var c7 = corpus7() 230 | print("\nCorpus 7") 231 | corpus_details(c7) 232 | benchamark[ahash](c7, "AHash") 233 | benchamark[sample_wyhash](c7, "Wyhash") 234 | # benchamark[sample_rapidhash](c7, "Rapidhash") 235 | benchamark32[fnv1a32](c7, "fnv1a32") 236 | benchamark[fnv1a64](c7, "fnv1a64") 237 | benchamark32[sample_fxhash32](c7, "fxHash32") 238 | benchamark[sample_fxhash64](c7, "fxHash64") 239 | benchamark[std_hash64](c7, "std_Hash64") 240 | benchamark[o1_hash](c7, "o1Hash") 241 | benchamark[md5_hash](c7, "MD5") 242 | 243 | var c8 = corpus8() 244 | print("\nCorpus 8") 245 | corpus_details(c8) 246 | benchamark[ahash, 3](c8, "AHash") 247 | benchamark[sample_wyhash, 3](c8, "Wyhash") 248 | # benchamark[sample_rapidhash](c8, "Rapidhash") 249 | benchamark32[fnv1a32, 3](c8, "fnv1a32") 250 | benchamark[fnv1a64, 3](c8, "fnv1a64") 251 | benchamark32[sample_fxhash32, 3](c8, "fxHash32") 252 | benchamark[sample_fxhash64, 3](c8, "fxHash64") 253 | benchamark[std_hash64, 3](c8, "std_Hash64") 254 | benchamark[o1_hash](c8, "o1Hash") 255 | # benchamark[md5_hash, 1](c8, "MD5") 256 | -------------------------------------------------------------------------------- /sha/sha256.mojo: -------------------------------------------------------------------------------- 1 | from memory import memcpy 2 | from collections.vector import InlinedFixedVector 3 | import time 4 | 5 | @always_inline 6 | fn big_endian_bytes_to_dword( 7 | first: UInt8, second: UInt8, third: UInt8, fourth: UInt8 8 | ) -> UInt32: 9 | var a = first.cast[DType.uint32]() << 24 10 | var b = second.cast[DType.uint32]() << 16 11 | var c = third.cast[DType.uint32]() << 8 12 | var d = fourth.cast[DType.uint32]() << 0 13 | return a | b | c | d 14 | 15 | 16 | @always_inline 17 | fn big_endian_dword_to_bytes(word: UInt32) -> InlinedFixedVector[UInt8, 4]: 18 | var v = InlinedFixedVector[UInt8, 4](4) 19 | var a = (word >> 24) & 255 20 | var b = (word >> 16) & 255 21 | var c = (word >> 8) & 255 22 | var d = word & 255 23 | v.append(a.cast[DType.uint8]()) 24 | v.append(b.cast[DType.uint8]()) 25 | v.append(c.cast[DType.uint8]()) 26 | v.append(d.cast[DType.uint8]()) 27 | return v 28 | 29 | 30 | @always_inline 31 | fn big_endian_qword_to_bytes(word: UInt64) -> InlinedFixedVector[UInt8, 8]: 32 | var v = InlinedFixedVector[UInt8, 8](8) 33 | var a = (word >> 56) & 255 34 | var b = (word >> 48) & 255 35 | var c = (word >> 40) & 255 36 | var d = (word >> 32) & 255 37 | var e = (word >> 24) & 255 38 | var f = (word >> 16) & 255 39 | var g = (word >> 8) & 255 40 | var h = word & 255 41 | v.append(a.cast[DType.uint8]()) 42 | v.append(b.cast[DType.uint8]()) 43 | v.append(c.cast[DType.uint8]()) 44 | v.append(d.cast[DType.uint8]()) 45 | v.append(e.cast[DType.uint8]()) 46 | v.append(f.cast[DType.uint8]()) 47 | v.append(g.cast[DType.uint8]()) 48 | v.append(h.cast[DType.uint8]()) 49 | return v 50 | 51 | 52 | # bit rotate right 53 | @always_inline 54 | fn bitrr(integer: UInt32, rotations: UInt32) -> UInt32: 55 | return (integer >> rotations) | (integer << (32 - rotations)) 56 | 57 | 58 | alias k = SIMD[DType.uint32, 64]( 59 | 0x428A2F98, 0x71374491, 0xB5C0FBCF, 0xE9B5DBA5, 0x3956C25B, 0x59F111F1, 0x923F82A4, 0xAB1C5ED5, 60 | 0xD807AA98, 0x12835B01, 0x243185BE, 0x550C7DC3, 0x72BE5D74, 0x80DEB1FE, 0x9BDC06A7, 0xC19BF174, 61 | 0xE49B69C1, 0xEFBE4786, 0x0FC19DC6, 0x240CA1CC, 0x2DE92C6F, 0x4A7484AA, 0x5CB0A9DC, 0x76F988DA, 62 | 0x983E5152, 0xA831C66D, 0xB00327C8, 0xBF597FC7, 0xC6E00BF3, 0xD5A79147, 0x06CA6351, 0x14292967, 63 | 0x27B70A85, 0x2E1B2138, 0x4D2C6DFC, 0x53380D13, 0x650A7354, 0x766A0ABB, 0x81C2C92E, 0x92722C85, 64 | 0xA2BFE8A1, 0xA81A664B, 0xC24B8B70, 0xC76C51A3, 0xD192E819, 0xD6990624, 0xF40E3585, 0x106AA070, 65 | 0x19A4C116, 0x1E376C08, 0x2748774C, 0x34B0BCB5, 0x391C0CB3, 0x4ED8AA4A, 0x5B9CCA4F, 0x682E6FF3, 66 | 0x748F82EE, 0x78A5636F, 0x84C87814, 0x8CC70208, 0x90BEFFFA, 0xA4506CEB, 0xBEF9A3F7, 0xC67178F2 67 | ) 68 | 69 | alias h = SIMD[DType.uint32, 8]( 70 | 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19, 71 | ) 72 | 73 | # for reference see https://en.wikipedia.org/wiki/SHA-2#Pseudocode 74 | # right now it internally copies the byte_view into a dynamic vector and works on that 75 | # this is slow, but i don't have the mojo mojo to chunk it out for zero-copy 76 | fn sha256_encode(byte_view: UnsafePointer[UInt8], length: Int) -> InlinedFixedVector[UInt8, 32]: 77 | 78 | var h0: UInt32 = 0x6A09E667 79 | var h1: UInt32 = 0xBB67AE85 80 | var h2: UInt32 = 0x3C6EF372 81 | var h3: UInt32 = 0xA54FF53A 82 | var h4: UInt32 = 0x510E527F 83 | var h5: UInt32 = 0x9B05688C 84 | var h6: UInt32 = 0x1F83D9AB 85 | var h7: UInt32 = 0x5BE0CD19 86 | 87 | var one_bit: UInt8 = 0b1000_0000 88 | 89 | var exact_chunks = length // 64 90 | var remainder_start = exact_chunks * 64 91 | var remainder_length = length % 64 92 | var bare_min_extra_bytes = remainder_length + 9 93 | var extra_space = InlinedFixedVector[UInt8,128](128) 94 | for i in range(remainder_length): 95 | extra_space.append(byte_view[remainder_start + i]) 96 | 97 | extra_space.append(one_bit) 98 | var only_one_chunk_needed = bare_min_extra_bytes <= 64 99 | var tail_bytes = big_endian_qword_to_bytes(length * 8) 100 | if only_one_chunk_needed: 101 | while 8+extra_space.current_size < 64: 102 | extra_space.append(0) 103 | else: 104 | while 8+extra_space.current_size < 128: 105 | extra_space.append(0) 106 | 107 | for i in range(8): 108 | extra_space.append(tail_bytes[i]) 109 | 110 | 111 | var w = InlinedFixedVector[UInt32, 64](64) 112 | # (The initial values in w[0..63] don't matter, so many implementations zero them here) 113 | for i in range(64): 114 | w.append(0) 115 | 116 | 117 | # loop through the full sets of 64 from the byte view 118 | # later, a little code duplication to repeat on the extra space 119 | for chunk_number in range(exact_chunks): 120 | # create a 64-entry message schedule array w[0..63] of 32-bit words 121 | 122 | # copy chunk into first 16 words w[0..15] of the message schedule array 123 | @parameter 124 | for dword_i in range(16): 125 | var start_byte_within_chunk = dword_i * 4 126 | var start_byte_overall = start_byte_within_chunk + (64 * chunk_number) 127 | var i = start_byte_overall 128 | var dword = big_endian_bytes_to_dword( 129 | byte_view[i], 130 | byte_view[i + 1], 131 | byte_view[i + 2], 132 | byte_view[i + 3], 133 | ) 134 | w[dword_i] = dword 135 | 136 | # Extend the first 16 words into the remaining 48 words w[16..63] of the message schedule array: 137 | @parameter 138 | for i in range(16, 64): 139 | # s0 := (w[i-15] rightrotate 7) xor (w[i-15] rightrotate 18) xor (w[i-15] rightshift 3) 140 | var s0 = bitrr(w[i - 15], 7) ^ bitrr(w[i - 15], 18) ^ (w[i - 15] >> 3) 141 | # s1 := (w[i-2] rightrotate 17) xor (w[i-2] rightrotate 19) xor (w[i-2] rightshift 10) 142 | var s1 = bitrr(w[i - 2], 17) ^ bitrr(w[i - 2], 19) ^ (w[i - 2] >> 10) 143 | # w[i] := w[i-16] + s0 + w[i-7] + s1 144 | w[i] = w[i - 16] + s0 + w[i - 7] + s1 145 | 146 | var a = h0 147 | var b = h1 148 | var c = h2 149 | var d = h3 150 | var e = h4 151 | var f = h5 152 | var g = h6 153 | var h = h7 154 | 155 | @parameter 156 | for i in range(64): 157 | # S1 := (e rightrotate 6) xor (e rightrotate 11) xor (e rightrotate 25) 158 | var S1 = bitrr(e, 6) ^ bitrr(e, 11) ^ bitrr(e, 25) 159 | # ch := (e and f) xor ((not e) and g) 160 | var ch = (e & f) ^ ((e ^ (0-1)) & g) 161 | # temp1 := h + S1 + ch + k[i] + w[i] 162 | var temp1 = h + S1 + ch + k[i] + w[i] 163 | # S0 := (a rightrotate 2) xor (a rightrotate 13) xor (a rightrotate 22) 164 | var S0 = bitrr(a, 2) ^ bitrr(a, 13) ^ bitrr(a, 22) 165 | # maj := (a and b) xor (a and c) xor (b and c) 166 | var maj = (a & b) ^ (a & c) ^ (b & c) 167 | # temp2 := S0 + maj 168 | var temp2 = S0 + maj 169 | 170 | h = g 171 | g = f 172 | f = e 173 | e = d + temp1 174 | d = c 175 | c = b 176 | b = a 177 | a = temp1 + temp2 178 | 179 | h0 = h0 + a 180 | h1 = h1 + b 181 | h2 = h2 + c 182 | h3 = h3 + d 183 | h4 = h4 + e 184 | h5 = h5 + f 185 | h6 = h6 + g 186 | h7 = h7 + h 187 | 188 | #continue through the extra space 189 | var extra_chunks = extra_space.current_size // 64 190 | for chunk_number in range(extra_chunks): 191 | # create a 64-entry message schedule array w[0..63] of 32-bit words 192 | 193 | # copy chunk into first 16 words w[0..15] of the message schedule array 194 | @parameter 195 | for dword_i in range(16): 196 | var start_byte_within_chunk = dword_i * 4 197 | var start_byte_overall = start_byte_within_chunk + (64 * chunk_number) 198 | var i = start_byte_overall 199 | var dword = big_endian_bytes_to_dword( 200 | extra_space[i], 201 | extra_space[i + 1], 202 | extra_space[i + 2], 203 | extra_space[i + 3], 204 | ) 205 | w[dword_i] = dword 206 | 207 | # Extend the first 16 words into the remaining 48 words w[16..63] of the message schedule array: 208 | @parameter 209 | for i in range(16, 64): 210 | # s0 := (w[i-15] rightrotate 7) xor (w[i-15] rightrotate 18) xor (w[i-15] rightshift 3) 211 | var s0 = bitrr(w[i - 15], 7) ^ bitrr(w[i - 15], 18) ^ (w[i - 15] >> 3) 212 | # s1 := (w[i-2] rightrotate 17) xor (w[i-2] rightrotate 19) xor (w[i-2] rightshift 10) 213 | var s1 = bitrr(w[i - 2], 17) ^ bitrr(w[i - 2], 19) ^ (w[i - 2] >> 10) 214 | # w[i] := w[i-16] + s0 + w[i-7] + s1 215 | w[i] = w[i - 16] + s0 + w[i - 7] + s1 216 | 217 | var a = h0 218 | var b = h1 219 | var c = h2 220 | var d = h3 221 | var e = h4 222 | var f = h5 223 | var g = h6 224 | var h = h7 225 | 226 | @parameter 227 | for i in range(64): 228 | # S1 := (e rightrotate 6) xor (e rightrotate 11) xor (e rightrotate 25) 229 | var S1 = bitrr(e, 6) ^ bitrr(e, 11) ^ bitrr(e, 25) 230 | # ch := (e and f) xor ((not e) and g) 231 | var ch = (e & f) ^ ((e ^ (0-1)) & g) 232 | # temp1 := h + S1 + ch + k[i] + w[i] 233 | var temp1 = h + S1 + ch + k[i] + w[i] 234 | # S0 := (a rightrotate 2) xor (a rightrotate 13) xor (a rightrotate 22) 235 | var S0 = bitrr(a, 2) ^ bitrr(a, 13) ^ bitrr(a, 22) 236 | # maj := (a and b) xor (a and c) xor (b and c) 237 | var maj = (a & b) ^ (a & c) ^ (b & c) 238 | # temp2 := S0 + maj 239 | var temp2 = S0 + maj 240 | 241 | h = g 242 | g = f 243 | f = e 244 | e = d + temp1 245 | d = c 246 | c = b 247 | b = a 248 | a = temp1 + temp2 249 | 250 | h0 = h0 + a 251 | h1 = h1 + b 252 | h2 = h2 + c 253 | h3 = h3 + d 254 | h4 = h4 + e 255 | h5 = h5 + f 256 | h6 = h6 + g 257 | h7 = h7 + h 258 | 259 | 260 | 261 | var output = InlinedFixedVector[UInt8, 32](32) 262 | 263 | var digest_part_h0 = big_endian_dword_to_bytes(h0) 264 | for i in range(4): 265 | output.append(digest_part_h0[i]) 266 | var digest_part_h1 = big_endian_dword_to_bytes(h1) 267 | for i in range(4): 268 | output.append(digest_part_h1[i]) 269 | var digest_part_h2 = big_endian_dword_to_bytes(h2) 270 | for i in range(4): 271 | output.append(digest_part_h2[i]) 272 | var digest_part_h3 = big_endian_dword_to_bytes(h3) 273 | for i in range(4): 274 | output.append(digest_part_h3[i]) 275 | var digest_part_h4 = big_endian_dword_to_bytes(h4) 276 | for i in range(4): 277 | output.append(digest_part_h4[i]) 278 | var digest_part_h5 = big_endian_dword_to_bytes(h5) 279 | for i in range(4): 280 | output.append(digest_part_h5[i]) 281 | var digest_part_h6 = big_endian_dword_to_bytes(h6) 282 | for i in range(4): 283 | output.append(digest_part_h6[i]) 284 | var digest_part_h7 = big_endian_dword_to_bytes(h7) 285 | for i in range(4): 286 | output.append(digest_part_h7[i]) 287 | 288 | return output 289 | -------------------------------------------------------------------------------- /test_sha256_2.mojo: -------------------------------------------------------------------------------- 1 | # from memory import memcpy 2 | # from collections.vector import InlinedFixedVector 3 | # import time 4 | 5 | 6 | # fn main(): 7 | # var bytes = 1024 * 1024 * 256 + 78 8 | # var bytes_to_hash: List[UInt8] = kinda_random_bytes(bytes) 9 | # var ptr = bytes_to_hash.unsafe_ptr() 10 | # var buffer = Buffer[DType.uint8](ptr, bytes_to_hash.size) 11 | # var before = time.now() 12 | # var hash = sha256(buffer) 13 | # var after = time.now() 14 | # var keep_vector_alive = bytes_to_hash[4] 15 | # var ns = after - before 16 | # var seconds = ns / 1_000_000_000 17 | # var megabytes = bytes / 1_000_000 18 | # for i in range(hash.size): 19 | # print(hash[i]) 20 | # print("megabytes per second") 21 | # print(megabytes / seconds) 22 | 23 | 24 | # fn kinda_random_bytes(length: Int) -> DynamicVector[UInt8]: 25 | # var vec = DynamicVector[UInt8](capacity=length) 26 | # var n: UInt8 = 245 27 | # var cycle: UInt8 = 1 28 | # for i in range(length): 29 | # var shifted = n >> 3 30 | # var shiftalso = n << 4 31 | # var more = shifted ^ n ^ shiftalso 32 | # var next = n + more 33 | # n = next 34 | # cycle ^= n 35 | # vec.append(n + cycle) 36 | 37 | # return vec 38 | 39 | 40 | # @always_inline 41 | # fn big_endian_bytes_to_dword( 42 | # first: UInt8, second: UInt8, third: UInt8, fourth: UInt8 43 | # ) -> UInt32: 44 | # var a = first.cast[DType.uint32]() << 24 45 | # var b = second.cast[DType.uint32]() << 16 46 | # var c = third.cast[DType.uint32]() << 8 47 | # var d = fourth.cast[DType.uint32]() << 0 48 | # return a | b | c | d 49 | 50 | 51 | # @always_inline 52 | # fn big_endian_dword_to_bytes(word: UInt32) -> InlinedFixedVector[UInt8, 4]: 53 | # var v = InlinedFixedVector[UInt8, 4](4) 54 | # var a = (word >> 24) & 255 55 | # var b = (word >> 16) & 255 56 | # var c = (word >> 8) & 255 57 | # var d = word & 255 58 | # v.append(a.cast[DType.uint8]()) 59 | # v.append(b.cast[DType.uint8]()) 60 | # v.append(c.cast[DType.uint8]()) 61 | # v.append(d.cast[DType.uint8]()) 62 | # return v 63 | 64 | 65 | # @always_inline 66 | # fn big_endian_qword_to_bytes(word: UInt64) -> InlinedFixedVector[UInt8, 8]: 67 | # var v = InlinedFixedVector[UInt8, 8](8) 68 | # var a = (word >> 56) & 255 69 | # var b = (word >> 48) & 255 70 | # var c = (word >> 40) & 255 71 | # var d = (word >> 32) & 255 72 | # var e = (word >> 24) & 255 73 | # var f = (word >> 16) & 255 74 | # var g = (word >> 8) & 255 75 | # var h = word & 255 76 | # v.append(a.cast[DType.uint8]()) 77 | # v.append(b.cast[DType.uint8]()) 78 | # v.append(c.cast[DType.uint8]()) 79 | # v.append(d.cast[DType.uint8]()) 80 | # v.append(e.cast[DType.uint8]()) 81 | # v.append(f.cast[DType.uint8]()) 82 | # v.append(g.cast[DType.uint8]()) 83 | # v.append(h.cast[DType.uint8]()) 84 | # return v 85 | 86 | 87 | # # bit rotate right 88 | # @always_inline 89 | # fn bitrr(integer: UInt32, rotations: UInt32) -> UInt32: 90 | # return (integer >> rotations) | (integer << (32 - rotations)) 91 | 92 | 93 | # # for reference see https://en.wikipedia.org/wiki/SHA-2#Pseudocode 94 | # # right now it internally copies the byte_view into a dynamic vector and works on that 95 | # # this is slow, but i don't have the mojo mojo to chunk it out for zero-copy 96 | # fn sha256(byte_view: Buffer[_, DType.uint8, 0]) -> InlinedFixedVector[UInt8, 32]: 97 | # var k = InlinedFixedVector[UInt32, 64](64) 98 | # k.append(0x428A2F98) 99 | # k.append(0x71374491) 100 | # k.append(0xB5C0FBCF) 101 | # k.append(0xE9B5DBA5) 102 | # k.append(0x3956C25B) 103 | # k.append(0x59F111F1) 104 | # k.append(0x923F82A4) 105 | # k.append(0xAB1C5ED5) 106 | # k.append(0xD807AA98) 107 | # k.append(0x12835B01) 108 | # k.append(0x243185BE) 109 | # k.append(0x550C7DC3) 110 | # k.append(0x72BE5D74) 111 | # k.append(0x80DEB1FE) 112 | # k.append(0x9BDC06A7) 113 | # k.append(0xC19BF174) 114 | # k.append(0xE49B69C1) 115 | # k.append(0xEFBE4786) 116 | # k.append(0x0FC19DC6) 117 | # k.append(0x240CA1CC) 118 | # k.append(0x2DE92C6F) 119 | # k.append(0x4A7484AA) 120 | # k.append(0x5CB0A9DC) 121 | # k.append(0x76F988DA) 122 | # k.append(0x983E5152) 123 | # k.append(0xA831C66D) 124 | # k.append(0xB00327C8) 125 | # k.append(0xBF597FC7) 126 | # k.append(0xC6E00BF3) 127 | # k.append(0xD5A79147) 128 | # k.append(0x06CA6351) 129 | # k.append(0x14292967) 130 | # k.append(0x27B70A85) 131 | # k.append(0x2E1B2138) 132 | # k.append(0x4D2C6DFC) 133 | # k.append(0x53380D13) 134 | # k.append(0x650A7354) 135 | # k.append(0x766A0ABB) 136 | # k.append(0x81C2C92E) 137 | # k.append(0x92722C85) 138 | # k.append(0xA2BFE8A1) 139 | # k.append(0xA81A664B) 140 | # k.append(0xC24B8B70) 141 | # k.append(0xC76C51A3) 142 | # k.append(0xD192E819) 143 | # k.append(0xD6990624) 144 | # k.append(0xF40E3585) 145 | # k.append(0x106AA070) 146 | # k.append(0x19A4C116) 147 | # k.append(0x1E376C08) 148 | # k.append(0x2748774C) 149 | # k.append(0x34B0BCB5) 150 | # k.append(0x391C0CB3) 151 | # k.append(0x4ED8AA4A) 152 | # k.append(0x5B9CCA4F) 153 | # k.append(0x682E6FF3) 154 | # k.append(0x748F82EE) 155 | # k.append(0x78A5636F) 156 | # k.append(0x84C87814) 157 | # k.append(0x8CC70208) 158 | # k.append(0x90BEFFFA) 159 | # k.append(0xA4506CEB) 160 | # k.append(0xBEF9A3F7) 161 | # k.append(0xC67178F2) 162 | 163 | # var h0: UInt32 = 0x6A09E667 164 | # var h1: UInt32 = 0xBB67AE85 165 | # var h2: UInt32 = 0x3C6EF372 166 | # var h3: UInt32 = 0xA54FF53A 167 | # var h4: UInt32 = 0x510E527F 168 | # var h5: UInt32 = 0x9B05688C 169 | # var h6: UInt32 = 0x1F83D9AB 170 | # var h7: UInt32 = 0x5BE0CD19 171 | 172 | # var one_bit: UInt8 = 0b1000_0000 173 | 174 | # var exact_chunks = byte_view.dynamic_size // 64 175 | # var remainder_start = exact_chunks * 64 176 | # var remainder_length = byte_view.dynamic_size % 64 177 | # var bare_min_extra_bytes = remainder_length + 9 178 | # var extra_space = InlinedFixedVector[UInt8,128](128) 179 | # for i in range(remainder_length): 180 | # extra_space.append(byte_view[remainder_start + i]) 181 | 182 | # extra_space.append(one_bit) 183 | # var only_one_chunk_needed = bare_min_extra_bytes <= 64 184 | # var tail_bytes = big_endian_qword_to_bytes(byte_view.dynamic_size * 8) 185 | # if only_one_chunk_needed: 186 | # while 8+extra_space.current_size < 64: 187 | # extra_space.append(0) 188 | # else: 189 | # while 8+extra_space.current_size < 128: 190 | # extra_space.append(0) 191 | 192 | # for i in range(8): 193 | # extra_space.append(tail_bytes[i]) 194 | 195 | 196 | # var w = InlinedFixedVector[UInt32, 64](64) 197 | # # (The initial values in w[0..63] don't matter, so many implementations zero them here) 198 | # for i in range(64): 199 | # w.append(0) 200 | 201 | 202 | # # loop through the full sets of 64 from the byte view 203 | # # later, a little code duplication to repeat on the extra space 204 | # for chunk_number in range(exact_chunks): 205 | # # create a 64-entry message schedule array w[0..63] of 32-bit words 206 | 207 | # # copy chunk into first 16 words w[0..15] of the message schedule array 208 | # @unroll 209 | # for dword_i in range(16): 210 | # var start_byte_within_chunk = dword_i * 4 211 | # var start_byte_overall = start_byte_within_chunk + (64 * chunk_number) 212 | # var i = start_byte_overall 213 | # var dword = big_endian_bytes_to_dword( 214 | # byte_view[i], 215 | # byte_view[i + 1], 216 | # byte_view[i + 2], 217 | # byte_view[i + 3], 218 | # ) 219 | # w[dword_i] = dword 220 | 221 | # # Extend the first 16 words into the remaining 48 words w[16..63] of the message schedule array: 222 | # @unroll 223 | # for i in range(16, 64): 224 | # # s0 := (w[i-15] rightrotate 7) xor (w[i-15] rightrotate 18) xor (w[i-15] rightshift 3) 225 | # var s0 = bitrr(w[i - 15], 7) ^ bitrr(w[i - 15], 18) ^ (w[i - 15] >> 3) 226 | # # s1 := (w[i-2] rightrotate 17) xor (w[i-2] rightrotate 19) xor (w[i-2] rightshift 10) 227 | # var s1 = bitrr(w[i - 2], 17) ^ bitrr(w[i - 2], 19) ^ (w[i - 2] >> 10) 228 | # # w[i] := w[i-16] + s0 + w[i-7] + s1 229 | # w[i] = w[i - 16] + s0 + w[i - 7] + s1 230 | 231 | # var a = h0 232 | # var b = h1 233 | # var c = h2 234 | # var d = h3 235 | # var e = h4 236 | # var f = h5 237 | # var g = h6 238 | # var h = h7 239 | 240 | # @unroll 241 | # for i in range(64): 242 | # # S1 := (e rightrotate 6) xor (e rightrotate 11) xor (e rightrotate 25) 243 | # var S1 = bitrr(e, 6) ^ bitrr(e, 11) ^ bitrr(e, 25) 244 | # # ch := (e and f) xor ((not e) and g) 245 | # var ch = (e & f) ^ ((e ^ (0-1)) & g) 246 | # # temp1 := h + S1 + ch + k[i] + w[i] 247 | # var temp1 = h + S1 + ch + k[i] + w[i] 248 | # # S0 := (a rightrotate 2) xor (a rightrotate 13) xor (a rightrotate 22) 249 | # var S0 = bitrr(a, 2) ^ bitrr(a, 13) ^ bitrr(a, 22) 250 | # # maj := (a and b) xor (a and c) xor (b and c) 251 | # var maj = (a & b) ^ (a & c) ^ (b & c) 252 | # # temp2 := S0 + maj 253 | # var temp2 = S0 + maj 254 | 255 | # h = g 256 | # g = f 257 | # f = e 258 | # e = d + temp1 259 | # d = c 260 | # c = b 261 | # b = a 262 | # a = temp1 + temp2 263 | 264 | # h0 = h0 + a 265 | # h1 = h1 + b 266 | # h2 = h2 + c 267 | # h3 = h3 + d 268 | # h4 = h4 + e 269 | # h5 = h5 + f 270 | # h6 = h6 + g 271 | # h7 = h7 + h 272 | 273 | # #continue through the extra space 274 | # var extra_chunks = extra_space.current_size // 64 275 | # for chunk_number in range(extra_chunks): 276 | # # create a 64-entry message schedule array w[0..63] of 32-bit words 277 | 278 | # # copy chunk into first 16 words w[0..15] of the message schedule array 279 | # @unroll 280 | # for dword_i in range(16): 281 | # var start_byte_within_chunk = dword_i * 4 282 | # var start_byte_overall = start_byte_within_chunk + (64 * chunk_number) 283 | # var i = start_byte_overall 284 | # var dword = big_endian_bytes_to_dword( 285 | # extra_space[i], 286 | # extra_space[i + 1], 287 | # extra_space[i + 2], 288 | # extra_space[i + 3], 289 | # ) 290 | # w[dword_i] = dword 291 | 292 | # # Extend the first 16 words into the remaining 48 words w[16..63] of the message schedule array: 293 | # @unroll 294 | # for i in range(16, 64): 295 | # # s0 := (w[i-15] rightrotate 7) xor (w[i-15] rightrotate 18) xor (w[i-15] rightshift 3) 296 | # var s0 = bitrr(w[i - 15], 7) ^ bitrr(w[i - 15], 18) ^ (w[i - 15] >> 3) 297 | # # s1 := (w[i-2] rightrotate 17) xor (w[i-2] rightrotate 19) xor (w[i-2] rightshift 10) 298 | # var s1 = bitrr(w[i - 2], 17) ^ bitrr(w[i - 2], 19) ^ (w[i - 2] >> 10) 299 | # # w[i] := w[i-16] + s0 + w[i-7] + s1 300 | # w[i] = w[i - 16] + s0 + w[i - 7] + s1 301 | 302 | # var a = h0 303 | # var b = h1 304 | # var c = h2 305 | # var d = h3 306 | # var e = h4 307 | # var f = h5 308 | # var g = h6 309 | # var h = h7 310 | 311 | # @unroll 312 | # for i in range(64): 313 | # # S1 := (e rightrotate 6) xor (e rightrotate 11) xor (e rightrotate 25) 314 | # var S1 = bitrr(e, 6) ^ bitrr(e, 11) ^ bitrr(e, 25) 315 | # # ch := (e and f) xor ((not e) and g) 316 | # var ch = (e & f) ^ ((e ^ (0-1)) & g) 317 | # # temp1 := h + S1 + ch + k[i] + w[i] 318 | # var temp1 = h + S1 + ch + k[i] + w[i] 319 | # # S0 := (a rightrotate 2) xor (a rightrotate 13) xor (a rightrotate 22) 320 | # var S0 = bitrr(a, 2) ^ bitrr(a, 13) ^ bitrr(a, 22) 321 | # # maj := (a and b) xor (a and c) xor (b and c) 322 | # var maj = (a & b) ^ (a & c) ^ (b & c) 323 | # # temp2 := S0 + maj 324 | # var temp2 = S0 + maj 325 | 326 | # h = g 327 | # g = f 328 | # f = e 329 | # e = d + temp1 330 | # d = c 331 | # c = b 332 | # b = a 333 | # a = temp1 + temp2 334 | 335 | # h0 = h0 + a 336 | # h1 = h1 + b 337 | # h2 = h2 + c 338 | # h3 = h3 + d 339 | # h4 = h4 + e 340 | # h5 = h5 + f 341 | # h6 = h6 + g 342 | # h7 = h7 + h 343 | 344 | 345 | 346 | # var output = InlinedFixedVector[UInt8, 32](32) 347 | 348 | # var digest_part_h0 = big_endian_dword_to_bytes(h0) 349 | # for i in range(4): 350 | # output.append(digest_part_h0[i]) 351 | # var digest_part_h1 = big_endian_dword_to_bytes(h1) 352 | # for i in range(4): 353 | # output.append(digest_part_h1[i]) 354 | # var digest_part_h2 = big_endian_dword_to_bytes(h2) 355 | # for i in range(4): 356 | # output.append(digest_part_h2[i]) 357 | # var digest_part_h3 = big_endian_dword_to_bytes(h3) 358 | # for i in range(4): 359 | # output.append(digest_part_h3[i]) 360 | # var digest_part_h4 = big_endian_dword_to_bytes(h4) 361 | # for i in range(4): 362 | # output.append(digest_part_h4[i]) 363 | # var digest_part_h5 = big_endian_dword_to_bytes(h5) 364 | # for i in range(4): 365 | # output.append(digest_part_h5[i]) 366 | # var digest_part_h6 = big_endian_dword_to_bytes(h6) 367 | # for i in range(4): 368 | # output.append(digest_part_h6[i]) 369 | # var digest_part_h7 = big_endian_dword_to_bytes(h7) 370 | # for i in range(4): 371 | # output.append(digest_part_h7[i]) 372 | 373 | # return output 374 | -------------------------------------------------------------------------------- /benchmark_other_languages/c/hash_functions/wyhash.h: -------------------------------------------------------------------------------- 1 | // This is free and unencumbered software released into the public domain under The Unlicense (http://unlicense.org/) 2 | // main repo: https://github.com/wangyi-fudan/wyhash 3 | // author: 王一 Wang Yi 4 | // contributors: Reini Urban, Dietrich Epp, Joshua Haberman, Tommy Ettinger, Daniel Lemire, Otmar Ertl, cocowalla, leo-yuriev, Diego Barrios Romero, paulie-g, dumblob, Yann Collet, ivte-ms, hyb, James Z.M. Gao, easyaspi314 (Devin), TheOneric 5 | 6 | /* quick example: 7 | string s="fjsakfdsjkf"; 8 | uint64_t hash=wyhash(s.c_str(), s.size(), 0, _wyp); 9 | */ 10 | 11 | #ifndef wyhash_final_version_4_2 12 | #define wyhash_final_version_4_2 13 | 14 | #ifndef WYHASH_CONDOM 15 | //protections that produce different results: 16 | //1: normal valid behavior 17 | //2: extra protection against entropy loss (probability=2^-63), aka. "blind multiplication" 18 | #define WYHASH_CONDOM 1 19 | #endif 20 | 21 | #ifndef WYHASH_32BIT_MUM 22 | //0: normal version, slow on 32 bit systems 23 | //1: faster on 32 bit systems but produces different results, incompatible with wy2u0k function 24 | #define WYHASH_32BIT_MUM 0 25 | #endif 26 | 27 | //includes 28 | #include 29 | #include 30 | #if defined(_MSC_VER) && defined(_M_X64) 31 | #include 32 | #pragma intrinsic(_umul128) 33 | #endif 34 | 35 | //likely and unlikely macros 36 | #if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__) 37 | #define _likely_(x) __builtin_expect(x,1) 38 | #define _unlikely_(x) __builtin_expect(x,0) 39 | #else 40 | #define _likely_(x) (x) 41 | #define _unlikely_(x) (x) 42 | #endif 43 | 44 | //128bit multiply function 45 | static inline uint64_t _wyrot(uint64_t x) { return (x>>32)|(x<<32); } 46 | static inline void _wymum(uint64_t *A, uint64_t *B){ 47 | #if(WYHASH_32BIT_MUM) 48 | uint64_t hh=(*A>>32)*(*B>>32), hl=(*A>>32)*(uint32_t)*B, lh=(uint32_t)*A*(*B>>32), ll=(uint64_t)(uint32_t)*A*(uint32_t)*B; 49 | #if(WYHASH_CONDOM>1) 50 | *A^=_wyrot(hl)^hh; *B^=_wyrot(lh)^ll; 51 | #else 52 | *A=_wyrot(hl)^hh; *B=_wyrot(lh)^ll; 53 | #endif 54 | #elif defined(__SIZEOF_INT128__) 55 | __uint128_t r=*A; r*=*B; 56 | #if(WYHASH_CONDOM>1) 57 | *A^=(uint64_t)r; *B^=(uint64_t)(r>>64); 58 | #else 59 | *A=(uint64_t)r; *B=(uint64_t)(r>>64); 60 | #endif 61 | #elif defined(_MSC_VER) && defined(_M_X64) 62 | #if(WYHASH_CONDOM>1) 63 | uint64_t a, b; 64 | a=_umul128(*A,*B,&b); 65 | *A^=a; *B^=b; 66 | #else 67 | *A=_umul128(*A,*B,B); 68 | #endif 69 | #else 70 | uint64_t ha=*A>>32, hb=*B>>32, la=(uint32_t)*A, lb=(uint32_t)*B, hi, lo; 71 | uint64_t rh=ha*hb, rm0=ha*lb, rm1=hb*la, rl=la*lb, t=rl+(rm0<<32), c=t>32)+(rm1>>32)+c; 73 | #if(WYHASH_CONDOM>1) 74 | *A^=lo; *B^=hi; 75 | #else 76 | *A=lo; *B=hi; 77 | #endif 78 | #endif 79 | } 80 | 81 | //multiply and xor mix function, aka MUM 82 | static inline uint64_t _wymix(uint64_t A, uint64_t B){ _wymum(&A,&B); return A^B; } 83 | 84 | //endian macros 85 | #ifndef WYHASH_LITTLE_ENDIAN 86 | #if defined(_WIN32) || defined(__LITTLE_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) 87 | #define WYHASH_LITTLE_ENDIAN 1 88 | #elif defined(__BIG_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) 89 | #define WYHASH_LITTLE_ENDIAN 0 90 | #else 91 | #warning could not determine endianness! Falling back to little endian. 92 | #define WYHASH_LITTLE_ENDIAN 1 93 | #endif 94 | #endif 95 | 96 | //read functions 97 | #if (WYHASH_LITTLE_ENDIAN) 98 | static inline uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return v;} 99 | static inline uint64_t _wyr4(const uint8_t *p) { uint32_t v; memcpy(&v, p, 4); return v;} 100 | #elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__) 101 | static inline uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return __builtin_bswap64(v);} 102 | static inline uint64_t _wyr4(const uint8_t *p) { uint32_t v; memcpy(&v, p, 4); return __builtin_bswap32(v);} 103 | #elif defined(_MSC_VER) 104 | static inline uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return _byteswap_uint64(v);} 105 | static inline uint64_t _wyr4(const uint8_t *p) { uint32_t v; memcpy(&v, p, 4); return _byteswap_ulong(v);} 106 | #else 107 | static inline uint64_t _wyr8(const uint8_t *p) { 108 | uint64_t v; memcpy(&v, p, 8); 109 | return (((v >> 56) & 0xff)| ((v >> 40) & 0xff00)| ((v >> 24) & 0xff0000)| ((v >> 8) & 0xff000000)| ((v << 8) & 0xff00000000)| ((v << 24) & 0xff0000000000)| ((v << 40) & 0xff000000000000)| ((v << 56) & 0xff00000000000000)); 110 | } 111 | static inline uint64_t _wyr4(const uint8_t *p) { 112 | uint32_t v; memcpy(&v, p, 4); 113 | return (((v >> 24) & 0xff)| ((v >> 8) & 0xff00)| ((v << 8) & 0xff0000)| ((v << 24) & 0xff000000)); 114 | } 115 | #endif 116 | static inline uint64_t _wyr3(const uint8_t *p, size_t k) { return (((uint64_t)p[0])<<16)|(((uint64_t)p[k>>1])<<8)|p[k-1];} 117 | //wyhash main function 118 | static inline uint64_t wyhash(const void *key, size_t len, uint64_t seed, const uint64_t *secret){ 119 | const uint8_t *p=(const uint8_t *)key; seed^=_wymix(seed^secret[0],secret[1]); uint64_t a, b; 120 | if(_likely_(len<=16)){ 121 | if(_likely_(len>=4)){ a=(_wyr4(p)<<32)|_wyr4(p+((len>>3)<<2)); b=(_wyr4(p+len-4)<<32)|_wyr4(p+len-4-((len>>3)<<2)); } 122 | else if(_likely_(len>0)){ a=_wyr3(p,len); b=0;} 123 | else a=b=0; 124 | } 125 | else{ 126 | size_t i=len; 127 | if(_unlikely_(i>=48)){ 128 | uint64_t see1=seed, see2=seed; 129 | do{ 130 | seed=_wymix(_wyr8(p)^secret[1],_wyr8(p+8)^seed); 131 | see1=_wymix(_wyr8(p+16)^secret[2],_wyr8(p+24)^see1); 132 | see2=_wymix(_wyr8(p+32)^secret[3],_wyr8(p+40)^see2); 133 | p+=48; i-=48; 134 | }while(_likely_(i>=48)); 135 | seed^=see1^see2; 136 | } 137 | while(_unlikely_(i>16)){ seed=_wymix(_wyr8(p)^secret[1],_wyr8(p+8)^seed); i-=16; p+=16; } 138 | a=_wyr8(p+i-16); b=_wyr8(p+i-8); 139 | } 140 | a^=secret[1]; b^=seed; _wymum(&a,&b); 141 | return _wymix(a^secret[0]^len,b^secret[1]); 142 | } 143 | 144 | //the default secret parameters 145 | static const uint64_t _wyp[4] = {0x2d358dccaa6c78a5ull, 0x8bb84b93962eacc9ull, 0x4b33a62ed433d4a3ull, 0x4d5a2da51de1aa47ull}; 146 | 147 | //a useful 64bit-64bit mix function to produce deterministic pseudo random numbers that can pass BigCrush and PractRand 148 | static inline uint64_t wyhash64(uint64_t A, uint64_t B){ A^=0x2d358dccaa6c78a5ull; B^=0x8bb84b93962eacc9ull; _wymum(&A,&B); return _wymix(A^0x2d358dccaa6c78a5ull,B^0x8bb84b93962eacc9ull);} 149 | 150 | //The wyrand PRNG that pass BigCrush and PractRand 151 | static inline uint64_t wyrand(uint64_t *seed){ *seed+=0x2d358dccaa6c78a5ull; return _wymix(*seed,*seed^0x8bb84b93962eacc9ull);} 152 | 153 | //convert any 64 bit pseudo random numbers to uniform distribution [0,1). It can be combined with wyrand, wyhash64 or wyhash. 154 | static inline double wy2u01(uint64_t r){ const double _wynorm=1.0/(1ull<<52); return (r>>12)*_wynorm;} 155 | 156 | //convert any 64 bit pseudo random numbers to APPROXIMATE Gaussian distribution. It can be combined with wyrand, wyhash64 or wyhash. 157 | static inline double wy2gau(uint64_t r){ const double _wynorm=1.0/(1ull<<20); return ((r&0x1fffff)+((r>>21)&0x1fffff)+((r>>42)&0x1fffff))*_wynorm-3.0;} 158 | 159 | #ifdef WYTRNG 160 | #include 161 | //The wytrand true random number generator, passed BigCrush. 162 | static inline uint64_t wytrand(uint64_t *seed){ 163 | struct timeval t; gettimeofday(&t,0); 164 | uint64_t teed=(((uint64_t)t.tv_sec)<<32)|t.tv_usec; 165 | teed=_wymix(teed^_wyp[0],*seed^_wyp[1]); 166 | *seed=_wymix(teed^_wyp[0],_wyp[2]); 167 | return _wymix(*seed,*seed^_wyp[3]); 168 | } 169 | #endif 170 | 171 | #if(!WYHASH_32BIT_MUM) 172 | //fast range integer random number generation on [0,k) credit to Daniel Lemire. May not work when WYHASH_32BIT_MUM=1. It can be combined with wyrand, wyhash64 or wyhash. 173 | static inline uint64_t wy2u0k(uint64_t r, uint64_t k){ _wymum(&r,&k); return k; } 174 | #endif 175 | 176 | // modified from https://github.com/going-digital/Prime64 177 | static inline unsigned long long mul_mod(unsigned long long a, unsigned long long b, unsigned long long m) { 178 | unsigned long long r=0; 179 | while (b) { 180 | if (b & 1) { 181 | unsigned long long r2 = r + a; 182 | if (r2 < r) r2 -= m; 183 | r = r2 % m; 184 | } 185 | b >>= 1; 186 | if (b) { 187 | unsigned long long a2 = a + a; 188 | if (a2 < a) a2 -= m; 189 | a = a2 % m; 190 | } 191 | } 192 | return r; 193 | } 194 | static inline unsigned long long pow_mod(unsigned long long a, unsigned long long b, unsigned long long m) { 195 | unsigned long long r=1; 196 | while (b) { 197 | if (b&1) r=mul_mod(r,a,m); 198 | b>>=1; 199 | if (b) a=mul_mod(a,a,m); 200 | } 201 | return r; 202 | } 203 | unsigned sprp(unsigned long long n, unsigned long long a) { 204 | unsigned long long d=n-1; 205 | unsigned char s=0; 206 | while (!(d & 0xff)) { d>>=8; s+=8; } 207 | if (!(d & 0xf)) { d>>=4; s+=4; } 208 | if (!(d & 0x3)) { d>>=2; s+=2; } 209 | if (!(d & 0x1)) { d>>=1; s+=1; } 210 | unsigned long long b=pow_mod(a,d,n); 211 | if ((b==1) || (b==(n-1))) return 1; 212 | unsigned char r; 213 | for (r=1; r> 1) & 0x5555555555555555; 256 | x = (x & 0x3333333333333333) + ((x >> 2) & 0x3333333333333333); 257 | x = (x + (x >> 4)) & 0x0f0f0f0f0f0f0f0f; 258 | x = (x * 0x0101010101010101) >> 56; 259 | if(x!=32){ ok=0; break; } 260 | #endif 261 | } 262 | if(ok&&!is_prime(secret[i])) ok=0; 263 | }while(!ok); 264 | } 265 | } 266 | 267 | #endif 268 | 269 | /* The Unlicense 270 | This is free and unencumbered software released into the public domain. 271 | 272 | Anyone is free to copy, modify, publish, use, compile, sell, or 273 | distribute this software, either in source code form or as a compiled 274 | binary, for any purpose, commercial or non-commercial, and by any 275 | means. 276 | 277 | In jurisdictions that recognize copyright laws, the author or authors 278 | of this software dedicate any and all copyright interest in the 279 | software to the public domain. We make this dedication for the benefit 280 | of the public at large and to the detriment of our heirs and 281 | successors. We intend this dedication to be an overt act of 282 | relinquishment in perpetuity of all present and future rights to this 283 | software under copyright law. 284 | 285 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 286 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 287 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 288 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 289 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 290 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 291 | OTHER DEALINGS IN THE SOFTWARE. 292 | 293 | For more information, please refer to 294 | */ -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # mojo-hash 2 | A collection of hash functions implemented in Mojo. 3 | 4 | ## AHash 5 | Original repo: https://github.com/tkaitchuck/aHash 6 | Note: implements the fallback version (without AES-NI intrinsics use), uses folded multiply function without u128 support 7 | 8 | ## fnv1a 9 | Original repo: https://github.com/ziglang/zig/blob/master/lib/std/hash/fnv.zig 10 | Note: implements 32 and 64 bit variants 11 | 12 | ## fxhash 13 | Original repo: https://github.com/cbreeden/fxhash/tree/master 14 | Note: implements 32 and 64 bit variants 15 | 16 | ## Wyhash 17 | Original repo: https://github.com/wangyi-fudan/wyhash 18 | Note: `wymum` implemented as if `WYHASH_32BIT_MUM` is set and `WYHASH_CONDOM` not set. Little endian only. 19 | 20 | ## Benachmark 21 | Collecets average hash function runtime in nanoseconds based on 7 different word collections. The average runtime is computed 20 times on each word collection, the fastest is kept as final result. Shows collision on full 32/64 bit space and 1024 mod (10 bit) space 22 | 23 | ### Results 24 | 25 | CPU Specs: 11th Gen Intel(R) Core(TM) i7-1165G7 @ 2.80GHz 26 | 27 | ``` 28 | Corpus 1 29 | Word count 100 | unique word count 82 | min key size 2 | avg key size 5.71 | max key size 12 30 | AHash avg hash compute 18.149999999999999 | hash colision 1.0 | hash colision mod 512 1.1549295774647887 31 | Wyhash avg hash compute 17.079999999999998 | hash colision 1.0 | hash colision mod 512 1.1232876712328768 32 | fnv1a32 avg hash compute 15.08 | hash colision 1.0 | hash colision mod 512 1.1232876712328768 33 | fnv1a64 avg hash compute 16.32 | hash colision 1.0 | hash colision mod 512 1.0249999999999999 34 | fxHash32 avg hash compute 12.539999999999999 | hash colision 1.0 | hash colision mod 512 1.2238805970149254 35 | fxHash64 avg hash compute 12.56 | hash colision 1.0 | hash colision mod 512 1.1884057971014492 36 | std_Hash64 avg hash compute 213.0 | hash colision 1.0 | hash colision mod 512 1.0512820512820513 37 | 38 | Corpus 2 39 | Word count 999 | unique word count 203 | min key size 1 | avg key size 4.8058058058058055 | max key size 14 40 | AHash avg hash compute 18.263263263263262 | hash colision 1.0 | hash colision mod 512 1.2083333333333333 41 | Wyhash avg hash compute 20.11011011011011 | hash colision 1.0 | hash colision mod 512 1.2303030303030302 42 | fnv1a32 avg hash compute 17.995995995995997 | hash colision 1.0 | hash colision mod 512 1.2848101265822784 43 | fnv1a64 avg hash compute 16.079079079079079 | hash colision 1.0 | hash colision mod 512 1.2011834319526626 44 | fxHash32 avg hash compute 14.397397397397397 | hash colision 1.0 | hash colision mod 512 1.3716216216216217 45 | fxHash64 avg hash compute 12.603603603603604 | hash colision 1.0 | hash colision mod 512 1.4195804195804196 46 | std_Hash64 avg hash compute 239.15815815815816 | hash colision 1.0 | hash colision mod 512 1.2303030303030302 47 | 48 | Corpus 3 49 | Word count 999 | unique word count 192 | min key size 1 | avg key size 4.293293293293293 | max key size 13 50 | AHash avg hash compute 16.716716716716718 | hash colision 1.0 | hash colision mod 512 1.1636363636363636 51 | Wyhash avg hash compute 16.952952952952952 | hash colision 1.0 | hash colision mod 512 1.2151898734177216 52 | fnv1a32 avg hash compute 15.968968968968969 | hash colision 1.0 | hash colision mod 512 1.1428571428571428 53 | fnv1a64 avg hash compute 18.862862862862862 | hash colision 1.0 | hash colision mod 512 1.2229299363057324 54 | fxHash32 avg hash compute 15.723723723723724 | hash colision 1.0 | hash colision mod 512 1.352112676056338 55 | fxHash64 avg hash compute 17.168168168168169 | hash colision 1.0 | hash colision mod 512 1.4436090225563909 56 | std_Hash64 avg hash compute 258.6146146146146 | hash colision 1.0 | hash colision mod 512 1.1779141104294479 57 | 58 | Corpus 4 59 | Word count 999 | unique word count 532 | min key size 2 | avg key size 10.646646646646646 | max key size 37 60 | AHash avg hash compute 20.205205205205207 | hash colision 1.0 | hash colision mod 512 1.5786350148367954 61 | Wyhash avg hash compute 20.234234234234233 | hash colision 1.0 | hash colision mod 512 1.5975975975975976 62 | fnv1a32 avg hash compute 21.814814814814813 | hash colision 1.0 | hash colision mod 512 1.6170212765957446 63 | fnv1a64 avg hash compute 24.41041041041041 | hash colision 1.0 | hash colision mod 512 1.5928143712574849 64 | fxHash32 avg hash compute 16.208208208208209 | hash colision 1.0 | hash colision mod 512 1.6677115987460815 65 | fxHash64 avg hash compute 15.890890890890891 | hash colision 1.0 | hash colision mod 512 1.9850746268656716 66 | std_Hash64 avg hash compute 218.3093093093093 | hash colision 1.0018832391713748 | hash colision mod 512 1.6170212765957446 67 | 68 | Corpus 5 69 | Word count 999 | unique word count 208 | min key size 2 | avg key size 5.6496496496496498 | max key size 18 70 | AHash avg hash compute 15.921921921921921 | hash colision 1.0 | hash colision mod 512 1.1620111731843576 71 | Wyhash avg hash compute 19.517517517517518 | hash colision 1.0 | hash colision mod 512 1.1685393258426966 72 | fnv1a32 avg hash compute 17.042042042042041 | hash colision 1.0 | hash colision mod 512 1.2093023255813953 73 | fnv1a64 avg hash compute 18.58958958958959 | hash colision 1.0 | hash colision mod 512 1.2530120481927711 74 | fxHash32 avg hash compute 14.552552552552553 | hash colision 1.0 | hash colision mod 512 1.3506493506493507 75 | fxHash64 avg hash compute 14.527527527527528 | hash colision 1.0 | hash colision mod 512 1.3594771241830066 76 | std_Hash64 avg hash compute 239.1181181181181 | hash colision 1.0 | hash colision mod 512 1.2023121387283238 77 | 78 | Corpus 6 79 | Word count 10 | unique word count 10 | min key size 378 | avg key size 499.19999999999999 | max key size 558 80 | AHash avg hash compute 67.400000000000006 | hash colision 1.0 | hash colision mod 512 1.0 81 | Wyhash avg hash compute 64.200000000000003 | hash colision 1.0 | hash colision mod 512 1.0 82 | fnv1a32 avg hash compute 499.60000000000002 | hash colision 1.0 | hash colision mod 512 1.0 83 | fnv1a64 avg hash compute 620.70000000000005 | hash colision 1.0 | hash colision mod 512 1.0 84 | fxHash32 avg hash compute 163.80000000000001 | hash colision 1.0 | hash colision mod 512 1.0 85 | fxHash64 avg hash compute 87.799999999999997 | hash colision 1.0 | hash colision mod 512 1.0 86 | std_Hash64 avg hash compute 247.59999999999999 | hash colision 1.0 | hash colision mod 512 1.0 87 | 88 | Corpus 7 89 | Word count 161 | unique word count 143 | min key size 8 | avg key size 22.260869565217391 | max key size 43 90 | AHash avg hash compute 19.546583850931675 | hash colision 1.0 | hash colision mod 512 1.1259842519685039 91 | Wyhash avg hash compute 22.670807453416149 | hash colision 1.0 | hash colision mod 512 1.1439999999999999 92 | fnv1a32 avg hash compute 32.900621118012424 | hash colision 1.0 | hash colision mod 512 1.153225806451613 93 | fnv1a64 avg hash compute 38.391304347826086 | hash colision 1.0 | hash colision mod 512 1.1626016260162602 94 | fxHash32 avg hash compute 20.043478260869566 | hash colision 1.0 | hash colision mod 512 1.1259842519685039 95 | fxHash64 avg hash compute 19.503105590062113 | hash colision 1.0 | hash colision mod 512 1.153225806451613 96 | std_Hash64 avg hash compute 242.59006211180125 | hash colision 1.0 | hash colision mod 512 1.1626016260162602 97 | ``` 98 | 99 | MacMini M1, 2020 100 | 101 | ``` 102 | Corpus 1 103 | Word count 100 | unique word count 82 | min key size 2 | avg key size 5.71 | max key size 12 104 | AHash avg hash compute 19.0 | hash colision 1.0 | hash colision mod 512 1.1549295774647887 105 | Wyhash avg hash compute 29.5 | hash colision 1.0 | hash colision mod 512 1.1232876712328768 106 | fnv1a32 avg hash compute 18.5 | hash colision 1.0 | hash colision mod 512 1.1232876712328768 107 | fnv1a64 avg hash compute 17.5 | hash colision 1.0 | hash colision mod 512 1.0249999999999999 108 | fxHash32 avg hash compute 18.0 | hash colision 1.0 | hash colision mod 512 1.2238805970149254 109 | fxHash64 avg hash compute 19.5 | hash colision 1.0 | hash colision mod 512 1.1884057971014492 110 | std_Hash64 avg hash compute 84.5 | hash colision 1.0 | hash colision mod 512 1.0512820512820513 111 | 112 | Corpus 2 113 | Word count 999 | unique word count 203 | min key size 1 | avg key size 4.8058058058058055 | max key size 14 114 | AHash avg hash compute 17.567567567567568 | hash colision 1.0 | hash colision mod 512 1.2083333333333333 115 | Wyhash avg hash compute 25.925925925925927 | hash colision 1.0 | hash colision mod 512 1.2303030303030302 116 | fnv1a32 avg hash compute 19.96996996996997 | hash colision 1.0 | hash colision mod 512 1.2848101265822784 117 | fnv1a64 avg hash compute 17.967967967967969 | hash colision 1.0 | hash colision mod 512 1.2011834319526626 118 | fxHash32 avg hash compute 16.016016016016017 | hash colision 1.0 | hash colision mod 512 1.3716216216216217 119 | fxHash64 avg hash compute 13.863863863863864 | hash colision 1.0 | hash colision mod 512 1.4195804195804196 120 | std_Hash64 avg hash compute 75.17517517517517 | hash colision 1.0 | hash colision mod 512 1.2303030303030302 121 | 122 | Corpus 3 123 | Word count 999 | unique word count 192 | min key size 1 | avg key size 4.293293293293293 | max key size 13 124 | AHash avg hash compute 18.468468468468469 | hash colision 1.0 | hash colision mod 512 1.1636363636363636 125 | Wyhash avg hash compute 24.474474474474473 | hash colision 1.0 | hash colision mod 512 1.2151898734177216 126 | fnv1a32 avg hash compute 19.81981981981982 | hash colision 1.0 | hash colision mod 512 1.1428571428571428 127 | fnv1a64 avg hash compute 17.417417417417418 | hash colision 1.0 | hash colision mod 512 1.2229299363057324 128 | fxHash32 avg hash compute 15.665665665665665 | hash colision 1.0 | hash colision mod 512 1.352112676056338 129 | fxHash64 avg hash compute 16.216216216216218 | hash colision 1.0 | hash colision mod 512 1.4436090225563909 130 | std_Hash64 avg hash compute 87.037037037037038 | hash colision 1.0 | hash colision mod 512 1.1779141104294479 131 | 132 | Corpus 4 133 | Word count 999 | unique word count 532 | min key size 2 | avg key size 10.646646646646646 | max key size 37 134 | AHash avg hash compute 19.51951951951952 | hash colision 1.0 | hash colision mod 512 1.5786350148367954 135 | Wyhash avg hash compute 24.874874874874873 | hash colision 1.0 | hash colision mod 512 1.5975975975975976 136 | fnv1a32 avg hash compute 25.575575575575577 | hash colision 1.0 | hash colision mod 512 1.6170212765957446 137 | fnv1a64 avg hash compute 24.274274274274273 | hash colision 1.0 | hash colision mod 512 1.5928143712574849 138 | fxHash32 avg hash compute 15.665665665665665 | hash colision 1.0 | hash colision mod 512 1.6677115987460815 139 | fxHash64 avg hash compute 17.867867867867869 | hash colision 1.0 | hash colision mod 512 1.9850746268656716 140 | std_Hash64 avg hash compute 73.523523523523522 | hash colision 1.0018832391713748 | hash colision mod 512 1.5833333333333333 141 | 142 | Corpus 5 143 | Word count 999 | unique word count 208 | min key size 2 | avg key size 5.6496496496496498 | max key size 18 144 | AHash avg hash compute 17.817817817817819 | hash colision 1.0 | hash colision mod 512 1.1620111731843576 145 | Wyhash avg hash compute 26.576576576576578 | hash colision 1.0 | hash colision mod 512 1.1685393258426966 146 | fnv1a32 avg hash compute 19.76976976976977 | hash colision 1.0 | hash colision mod 512 1.2093023255813953 147 | fnv1a64 avg hash compute 18.918918918918919 | hash colision 1.0 | hash colision mod 512 1.2530120481927711 148 | fxHash32 avg hash compute 17.817817817817819 | hash colision 1.0 | hash colision mod 512 1.3506493506493507 149 | fxHash64 avg hash compute 15.715715715715715 | hash colision 1.0 | hash colision mod 512 1.3594771241830066 150 | std_Hash64 avg hash compute 75.425425425425431 | hash colision 1.0 | hash colision mod 512 1.2023121387283238 151 | 152 | Corpus 6 153 | Word count 10 | unique word count 10 | min key size 378 | avg key size 499.19999999999999 | max key size 558 154 | AHash avg hash compute 30.0 | hash colision 1.0 | hash colision mod 512 1.0 155 | Wyhash avg hash compute 90.0 | hash colision 1.0 | hash colision mod 512 1.0 156 | fnv1a32 avg hash compute 635.0 | hash colision 1.0 | hash colision mod 512 1.0 157 | fnv1a64 avg hash compute 660.0 | hash colision 1.0 | hash colision mod 512 1.0 158 | fxHash32 avg hash compute 250.0 | hash colision 1.0 | hash colision mod 512 1.0 159 | fxHash64 avg hash compute 145.0 | hash colision 1.0 | hash colision mod 512 1.0 160 | std_Hash64 avg hash compute 220.0 | hash colision 1.0 | hash colision mod 512 1.0 161 | 162 | Corpus 7 163 | Word count 161 | unique word count 143 | min key size 8 | avg key size 22.260869565217391 | max key size 43 164 | AHash avg hash compute 17.701863354037268 | hash colision 1.0 | hash colision mod 512 1.1259842519685039 165 | Wyhash avg hash compute 29.19254658385093 | hash colision 1.0 | hash colision mod 512 1.1439999999999999 166 | fnv1a32 avg hash compute 42.546583850931675 | hash colision 1.0 | hash colision mod 512 1.153225806451613 167 | fnv1a64 avg hash compute 39.440993788819874 | hash colision 1.0 | hash colision mod 512 1.1626016260162602 168 | fxHash32 avg hash compute 18.012422360248447 | hash colision 1.0 | hash colision mod 512 1.1259842519685039 169 | fxHash64 avg hash compute 19.875776397515526 | hash colision 1.0 | hash colision mod 512 1.153225806451613 170 | std_Hash64 avg hash compute 113.35403726708074 | hash colision 1.0 | hash colision mod 512 1.1259842519685039 171 | ``` 172 | 173 | ![Hash functions benchmark chart](images/hash_functions.png) 174 | 175 | ### Other languages benchmarks results: 176 | 177 | #### CPU Specs: 11th Gen Intel(R) Core(TM) i7-1165G7 @ 2.80GHz 178 | 179 | **Rust** 180 | 181 | ``` 182 | Avg time Default, 16.622: total elements: 100, unique elements: 82, collisions: 1.0, collisions % 512: 1.0649350649350648, keys min: 2, avg: 6, max: 12 183 | Avg time FxHasher, 17.769000000000002: total elements: 100, unique elements: 82, collisions: 1.0, collisions % 512: 1.0933333333333333, keys min: 2, avg: 6, max: 12 184 | Avg time AHasher, 16.5035: total elements: 100, unique elements: 82, collisions: 1.0, collisions % 512: 1.1232876712328768, keys min: 2, avg: 6, max: 12 185 | Avg time WyHash, 17.2495: total elements: 100, unique elements: 82, collisions: 1.0, collisions % 512: 1.0933333333333333, keys min: 2, avg: 6, max: 12 186 | Avg time Default, 16.026176176176175: total elements: 999, unique elements: 203, collisions: 1.0, collisions % 512: 1.1871345029239766, keys min: 1, avg: 5, max: 14 187 | Avg time FxHasher, 16.14034034034034: total elements: 999, unique elements: 203, collisions: 1.0, collisions % 512: 1.2083333333333333, keys min: 1, avg: 5, max: 14 188 | Avg time AHasher, 16.455305305305306: total elements: 999, unique elements: 203, collisions: 1.0, collisions % 512: 1.2011834319526626, keys min: 1, avg: 5, max: 14 189 | Avg time WyHash, 17.575925925925926: total elements: 999, unique elements: 203, collisions: 1.0, collisions % 512: 1.2083333333333333, keys min: 1, avg: 5, max: 14 190 | Avg time Default, 16.30725725725726: total elements: 999, unique elements: 192, collisions: 1.0, collisions % 512: 1.238709677419355, keys min: 1, avg: 5, max: 13 191 | Avg time FxHasher, 17.076676676676676: total elements: 999, unique elements: 192, collisions: 1.0, collisions % 512: 1.263157894736842, keys min: 1, avg: 5, max: 13 192 | Avg time AHasher, 16.477227227227228: total elements: 999, unique elements: 192, collisions: 1.0, collisions % 512: 1.2229299363057324, keys min: 1, avg: 5, max: 13 193 | Avg time WyHash, 17.61991991991992: total elements: 999, unique elements: 192, collisions: 1.0, collisions % 512: 1.2075471698113207, keys min: 1, avg: 5, max: 13 194 | Avg time Default, 17.10630630630631: total elements: 999, unique elements: 532, collisions: 1.0, collisions % 512: 1.5647058823529412, keys min: 2, avg: 12, max: 37 195 | Avg time FxHasher, 18.07007007007007: total elements: 999, unique elements: 532, collisions: 1.0, collisions % 512: 1.6419753086419753, keys min: 2, avg: 12, max: 37 196 | Avg time AHasher, 17.31981981981982: total elements: 999, unique elements: 532, collisions: 1.0, collisions % 512: 1.636923076923077, keys min: 2, avg: 12, max: 37 197 | Avg time WyHash, 17.735835835835836: total elements: 999, unique elements: 532, collisions: 1.0, collisions % 512: 1.636923076923077, keys min: 2, avg: 12, max: 37 198 | Avg time Default, 16.716816816816817: total elements: 999, unique elements: 208, collisions: 1.0, collisions % 512: 1.2606060606060605, keys min: 2, avg: 6, max: 18 199 | Avg time FxHasher, 17.642342342342342: total elements: 999, unique elements: 208, collisions: 1.0, collisions % 512: 1.2606060606060605, keys min: 2, avg: 6, max: 18 200 | Avg time AHasher, 16.40915915915916: total elements: 999, unique elements: 208, collisions: 1.0, collisions % 512: 1.1954022988505748, keys min: 2, avg: 6, max: 18 201 | Avg time WyHash, 17.5506006006006: total elements: 999, unique elements: 208, collisions: 1.0, collisions % 512: 1.1751412429378532, keys min: 2, avg: 6, max: 18 202 | Avg time Default, 126.03: total elements: 10, unique elements: 10, collisions: 1.0, collisions % 512: 1.0, keys min: 378, avg: 499, max: 558 203 | Avg time FxHasher, 93.1: total elements: 10, unique elements: 10, collisions: 1.0, collisions % 512: 1.0, keys min: 378, avg: 499, max: 558 204 | Avg time AHasher, 48.14: total elements: 10, unique elements: 10, collisions: 1.0, collisions % 512: 1.0, keys min: 378, avg: 499, max: 558 205 | Avg time WyHash, 43.175: total elements: 10, unique elements: 10, collisions: 1.0, collisions % 512: 1.0, keys min: 378, avg: 499, max: 558 206 | Avg time Default, 22.654658385093168: total elements: 161, unique elements: 143, collisions: 1.0, collisions % 512: 1.125984251968504, keys min: 8, avg: 22, max: 43 207 | Avg time FxHasher, 20.537888198757763: total elements: 161, unique elements: 143, collisions: 1.0, collisions % 512: 1.1916666666666667, keys min: 8, avg: 22, max: 43 208 | Avg time AHasher, 17.930124223602483: total elements: 161, unique elements: 143, collisions: 1.0, collisions % 512: 1.1349206349206349, keys min: 8, avg: 22, max: 43 209 | Avg time WyHash, 19.022360248447203: total elements: 161, unique elements: 143, collisions: 1.0, collisions % 512: 1.1349206349206349, keys min: 8, avg: 22, max: 43 210 | ``` 211 | 212 | #### MacMini M1, 2020 213 | 214 | **Rust** 215 | 216 | ``` 217 | Avg time Default, 26.552: total elements: 100, unique elements: 82, collisions: 1.0, collisions % 512: 1.0649350649350648, keys min: 2, avg: 6, max: 12 218 | Avg time FxHasher, 25.7875: total elements: 100, unique elements: 82, collisions: 1.0, collisions % 512: 1.0933333333333333, keys min: 2, avg: 6, max: 12 219 | Avg time AHasher, 26.688499999999998: total elements: 100, unique elements: 82, collisions: 1.0, collisions % 512: 1.1549295774647887, keys min: 2, avg: 6, max: 12 220 | Avg time WyHash, 27.168499999999998: total elements: 100, unique elements: 82, collisions: 1.0, collisions % 512: 1.0933333333333333, keys min: 2, avg: 6, max: 12 221 | Avg time Default, 30.68533533533534: total elements: 999, unique elements: 203, collisions: 1.0, collisions % 512: 1.1871345029239766, keys min: 1, avg: 5, max: 14 222 | Avg time FxHasher, 32.62207207207207: total elements: 999, unique elements: 203, collisions: 1.0, collisions % 512: 1.2083333333333333, keys min: 1, avg: 5, max: 14 223 | Avg time AHasher, 30.133333333333333: total elements: 999, unique elements: 203, collisions: 1.0, collisions % 512: 1.26875, keys min: 1, avg: 5, max: 14 224 | Avg time WyHash, 30.666916916916918: total elements: 999, unique elements: 203, collisions: 1.0, collisions % 512: 1.2083333333333333, keys min: 1, avg: 5, max: 14 225 | Avg time Default, 28.71331331331331: total elements: 999, unique elements: 192, collisions: 1.0, collisions % 512: 1.238709677419355, keys min: 1, avg: 5, max: 13 226 | Avg time FxHasher, 25.97787787787788: total elements: 999, unique elements: 192, collisions: 1.0, collisions % 512: 1.263157894736842, keys min: 1, avg: 5, max: 13 227 | Avg time AHasher, 26.035535535535537: total elements: 999, unique elements: 192, collisions: 1.0, collisions % 512: 1.238709677419355, keys min: 1, avg: 5, max: 13 228 | Avg time WyHash, 26.91166166166166: total elements: 999, unique elements: 192, collisions: 1.0, collisions % 512: 1.2075471698113207, keys min: 1, avg: 5, max: 13 229 | Avg time Default, 24.716066066066066: total elements: 999, unique elements: 532, collisions: 1.0, collisions % 512: 1.5647058823529412, keys min: 2, avg: 12, max: 37 230 | Avg time FxHasher, 23.58993993993994: total elements: 999, unique elements: 532, collisions: 1.0, collisions % 512: 1.6419753086419753, keys min: 2, avg: 12, max: 37 231 | Avg time AHasher, 23.47817817817818: total elements: 999, unique elements: 532, collisions: 1.0, collisions % 512: 1.6269113149847094, keys min: 2, avg: 12, max: 37 232 | Avg time WyHash, 21.5007007007007: total elements: 999, unique elements: 532, collisions: 1.0, collisions % 512: 1.636923076923077, keys min: 2, avg: 12, max: 37 233 | Avg time Default, 21.82362362362362: total elements: 999, unique elements: 208, collisions: 1.0, collisions % 512: 1.2606060606060605, keys min: 2, avg: 6, max: 18 234 | Avg time FxHasher, 21.375575575575578: total elements: 999, unique elements: 208, collisions: 1.0, collisions % 512: 1.2606060606060605, keys min: 2, avg: 6, max: 18 235 | Avg time AHasher, 20.11911911911912: total elements: 999, unique elements: 208, collisions: 1.0, collisions % 512: 1.2455089820359282, keys min: 2, avg: 6, max: 18 236 | Avg time WyHash, 20.31836836836837: total elements: 999, unique elements: 208, collisions: 1.0, collisions % 512: 1.1751412429378532, keys min: 2, avg: 6, max: 18 237 | Avg time Default, 181.005: total elements: 10, unique elements: 10, collisions: 1.0, collisions % 512: 1.0, keys min: 378, avg: 499, max: 558 238 | Avg time FxHasher, 122.93499999999999: total elements: 10, unique elements: 10, collisions: 1.0, collisions % 512: 1.0, keys min: 378, avg: 499, max: 558 239 | Avg time AHasher, 45.04: total elements: 10, unique elements: 10, collisions: 1.0, collisions % 512: 1.0, keys min: 378, avg: 499, max: 558 240 | Avg time WyHash, 31.005000000000003: total elements: 10, unique elements: 10, collisions: 1.0, collisions % 512: 1.0, keys min: 378, avg: 499, max: 558 241 | Avg time Default, 21.469254658385093: total elements: 161, unique elements: 143, collisions: 1.0, collisions % 512: 1.125984251968504, keys min: 8, avg: 22, max: 43 242 | Avg time FxHasher, 20.225155279503106: total elements: 161, unique elements: 143, collisions: 1.0, collisions % 512: 1.1916666666666667, keys min: 8, avg: 22, max: 43 243 | Avg time AHasher, 20.106832298136645: total elements: 161, unique elements: 143, collisions: 1.0, collisions % 512: 1.125984251968504, keys min: 8, avg: 22, max: 43 244 | Avg time WyHash, 19.890993788819873: total elements: 161, unique elements: 143, collisions: 1.0, collisions % 512: 1.1349206349206349, keys min: 8, avg: 22, max: 43 245 | ``` 246 | 247 | **Swift** 248 | 249 | ``` 250 | Avg time: 85.945, total elements: 100, unique elements: 82, collisions: 1.0, collisions % 512: 1.1388888, keys min: 2, avg: 6, max: 12 251 | Avg time: 67.80245, total elements: 999, unique elements: 203, collisions: 1.0, collisions % 512: 1.2083334, keys min: 1, avg: 5, max: 14 252 | Avg time: 65.73403, total elements: 999, unique elements: 192, collisions: 1.0, collisions % 512: 1.2, keys min: 1, avg: 5, max: 13 253 | Avg time: 240.52744, total elements: 999, unique elements: 532, collisions: 1.0, collisions % 512: 1.4224598, keys min: 1, avg: 6, max: 19 254 | Avg time: 79.92162, total elements: 999, unique elements: 208, collisions: 1.0, collisions % 512: 1.2163743, keys min: 1, avg: 6, max: 18 255 | Avg time: 1773.4, total elements: 10, unique elements: 10, collisions: 1.0, collisions % 512: 1.1111112, keys min: 130, avg: 171, max: 192 256 | Avg time: 140.84721, total elements: 161, unique elements: 143, collisions: 1.0, collisions % 512: 1.0592593, keys min: 8, avg: 22, max: 43 257 | ``` 258 | 259 | **Python** 260 | 261 | ``` 262 | Avg time: 76.5, total elements: 100, unique elements: 82, collisions: 1.0, collisions % 512: 1.1232876712328768, keys min: 2, avg: 6.012195121951219, max: 12 263 | Avg time: 71.52152152152152, total elements: 999, unique elements: 203, collisions: 1.0, collisions % 512: 1.215568862275449, keys min: 1, avg: 5.862068965517241, max: 14 264 | Avg time: 81.63163163163163, total elements: 999, unique elements: 192, collisions: 1.0, collisions % 512: 1.2, keys min: 1, avg: 5.385416666666667, max: 13 265 | Avg time: 79.72972972972973, total elements: 999, unique elements: 532, collisions: 1.0, collisions % 512: 1.592814371257485, keys min: 1, avg: 6.593984962406015, max: 19 266 | Avg time: 78.62862862862863, total elements: 999, unique elements: 208, collisions: 1.0, collisions % 512: 1.2023121387283238, keys min: 1, avg: 6.394230769230769, max: 18 267 | Avg time: 70.0, total elements: 10, unique elements: 10, collisions: 1.0, collisions % 512: 1.0, keys min: 130, avg: 171.4, max: 192 268 | Avg time: 75.77639751552795, total elements: 161, unique elements: 143, collisions: 1.0, collisions % 512: 1.125984251968504, keys min: 8, avg: 22.6013986013986, max: 43 269 | ``` 270 | 271 | **NodeJS** 272 | 273 | ``` 274 | Avg time WyHash: 5024.798, total elements: 100, unique elements: 82, collisions: 1, collisions % 512: 1.0512820512820513, keys min: 2, avg: 6.012195121951219, max: 12 275 | Avg time xxHash: 6870.5885, total elements: 100, unique elements: 82, collisions: 1, collisions % 512: 1.0933333333333333, keys min: 2, avg: 6.012195121951219, max: 12 276 | Avg time WyHash: 3688.6434934934937, total elements: 999, unique elements: 203, collisions: 1, collisions % 512: 1.26875, keys min: 1, avg: 5.862068965517241, max: 14 277 | Avg time xxHash: 4461.4131131131135, total elements: 999, unique elements: 203, collisions: 1, collisions % 512: 1.180232558139535, keys min: 1, avg: 5.862068965517241, max: 14 278 | Avg time WyHash: 3393.779079079079, total elements: 999, unique elements: 192, collisions: 1, collisions % 512: 1.1566265060240963, keys min: 1, avg: 5.385416666666667, max: 13 279 | Avg time xxHash: 4050.333833833834, total elements: 999, unique elements: 192, collisions: 1, collisions % 512: 1.1497005988023952, keys min: 1, avg: 5.385416666666667, max: 13 280 | Avg time WyHash: 4635.906556556557, total elements: 999, unique elements: 532, collisions: 1, collisions % 512: 1.6269113149847094, keys min: 1, avg: 6.593984962406015, max: 19 281 | Avg time xxHash: 5929.1773773773775, total elements: 999, unique elements: 532, collisions: 1, collisions % 512: 1.5880597014925373, keys min: 1, avg: 6.593984962406015, max: 19 282 | Avg time WyHash: 3601.807957957958, total elements: 999, unique elements: 208, collisions: 1, collisions % 512: 1.2093023255813953, keys min: 1, avg: 6.394230769230769, max: 18 283 | Avg time xxHash: 4370.727527527527, total elements: 999, unique elements: 208, collisions: 1, collisions % 512: 1.2682926829268293, keys min: 1, avg: 6.394230769230769, max: 18 284 | Avg time WyHash: 94997.88, total elements: 10, unique elements: 10, collisions: 1, collisions % 512: 1, keys min: 130, avg: 171.4, max: 192 285 | Avg time xxHash: 48261.055, total elements: 10, unique elements: 10, collisions: 1, collisions % 512: 1, keys min: 130, avg: 171.4, max: 192 286 | Avg time WyHash: 6467.241304347826, total elements: 161, unique elements: 143, collisions: 1, collisions % 512: 1.0916030534351144, keys min: 8, avg: 22.6013986013986, max: 43 287 | Avg time xxHash: 5186.903105590062, total elements: 161, unique elements: 143, collisions: 1, collisions % 512: 1.1349206349206349, keys min: 8, avg: 22.6013986013986, max: 43 288 | ``` 289 | 290 | **Go** 291 | 292 | ``` 293 | Avg time: 56.000000, total elements: 100, unique elements: 82, collisions: 1.000000, collisions mod 512: 1.108108, keys min: 2, avg: 6, max: 12 294 | Avg time: 53.353353, total elements: 999, unique elements: 203, collisions: 1.000000, collisions mod 512: 1.230303, keys min: 1, avg: 5, max: 14 295 | Avg time: 63.363363, total elements: 999, unique elements: 192, collisions: 1.000000, collisions mod 512: 1.215190, keys min: 1, avg: 5, max: 13 296 | Avg time: 57.507508, total elements: 999, unique elements: 532, collisions: 1.000000, collisions mod 512: 1.588060, keys min: 2, avg: 12, max: 37 297 | Avg time: 56.156156, total elements: 999, unique elements: 208, collisions: 1.000000, collisions mod 512: 1.238095, keys min: 2, avg: 6, max: 18 298 | Avg time: 115.000000, total elements: 10, unique elements: 10, collisions: 1.000000, collisions mod 512: 1.111111, keys min: 378, avg: 499, max: 558 299 | Avg time: 59.316770, total elements: 161, unique elements: 143, collisions: 1.000000, collisions mod 512: 1.108527, keys min: 8, avg: 22, max: 43 300 | ``` 301 | 302 | ## Benchmark HashMap 303 | 304 | This repository also contains a simple HashMap implementation, which allows key to be of type String and value to conform with CollectionElement trait. 305 | 306 | ### Results 307 | 308 | CPU Specs: 11th Gen Intel(R) Core(TM) i7-1165G7 @ 2.80GHz 309 | Tested with corpus 7, which is a list of S3 actions (total count 161, unique count 143) 310 | 311 | ``` 312 | AHash Avg put time 211.01180124223603 313 | AHash Avg get time 82.304968944099386 314 | WyHash Avg put time 206.67639751552795 315 | WyHash Avg get time 81.214285714285708 316 | FxHash64 Avg put time 223.24844720496895 317 | FxHash64 Avg get time 84.171428571428578 318 | StdHash Avg put time 634.18819875776398 319 | StdHash Avg get time 278.51801242236024 320 | ``` 321 | 322 | MacMini M1, 2020 323 | 324 | ``` 325 | AHash Avg put time 347.82608695652175 326 | AHash Avg get time 162.11180124223603 327 | WyHash Avg put time 363.35403726708074 328 | WyHash Avg get time 192.54658385093168 329 | FxHash64 Avg put time 418.63354037267078 330 | FxHash64 Avg get time 170.80745341614906 331 | StdHash Avg put time 583.22981366459624 332 | StdHash Avg get time 226.08695652173913 333 | ``` 334 | 335 | ![Hash map benchmark chart](images/hash_map.png) -------------------------------------------------------------------------------- /benchmark_other_languages/python/hash_functions/benchmark_dict.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from time import time_ns 3 | 4 | def main(): 5 | corpus1 = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Quisque orci urna, pretium et porta ac, porttitor sit amet sem. Fusce sagittis lorem neque, vitae sollicitudin elit suscipit et. In interdum convallis nisl in ornare. Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia curae; Aliquam erat volutpat. Morbi mollis iaculis lectus ac tincidunt. Fusce nisi lacus, semper eu dignissim et, malesuada non mi. Sed euismod urna vel elit faucibus, eu bibendum ante fringilla. Curabitur tempus in turpis at mattis. Aliquam erat volutpat. Donec maximus elementum felis, sit amet dignissim augue tincidunt blandit. Aliquam fermentum, est eu mollis.".split(" ") 6 | corpus2 = "But I must explain to you how all this mistaken idea of denouncing pleasure and praising pain was born and I will give you a complete account of the system, and expound the actual teachings of the great explorer of the truth, the master-builder of human happiness. No one rejects, dislikes, or avoids pleasure itself, because it is pleasure, but because those who do not know how to pursue pleasure rationally encounter consequences that are extremely painful. Nor again is there anyone who loves or pursues or desires to obtain pain of itself, because it is pain, but because occasionally circumstances occur in which toil and pain can procure him some great pleasure. To take a trivial example, which of us ever undertakes laborious physical exercise, except to obtain some advantage from it? But who has any right to find fault with a man who chooses to enjoy a pleasure that has no annoying consequences, or one who avoids a pain that produces no resultant pleasure? On the other hand, we denounce with righteous indignation and dislike men who are so beguiled and demoralized by the charms of pleasure of the moment, so blinded by desire, that they cannot foresee the pain and trouble that are bound to ensue; and equal blame belongs to those who fail in their duty through weakness of will, which is the same as saying through shrinking from toil and pain. These cases are perfectly simple and easy to distinguish. In a free hour, when our power of choice is untrammelled and when nothing prevents our being able to do what we like best, every pleasure is to be welcomed and every pain avoided. But in certain circumstances and owing to the claims of duty or the obligations of business it will frequently occur that pleasures have to be repudiated and annoyances accepted. The wise man therefore always holds in these matters to this principle of selection: he rejects pleasures to secure other greater pleasures, or else he endures pains to avoid worse pains. But I must explain to you how all this mistaken idea of denouncing pleasure and praising pain was born and I will give you a complete account of the system, and expound the actual teachings of the great explorer of the truth, the master-builder of human happiness. No one rejects, dislikes, or avoids pleasure itself, because it is pleasure, but because those who do not know how to pursue pleasure rationally encounter consequences that are extremely painful. Nor again is there anyone who loves or pursues or desires to obtain pain of itself, because it is pain, but because occasionally circumstances occur in which toil and pain can procure him some great pleasure. To take a trivial example, which of us ever undertakes laborious physical exercise, except to obtain some advantage from it? But who has any right to find fault with a man who chooses to enjoy a pleasure that has no annoying consequences, or one who avoids a pain that produces no resultant pleasure? On the other hand, we denounce with righteous indignation and dislike men who are so beguiled and demoralized by the charms of pleasure of the moment, so blinded by desire, that they cannot foresee the pain and trouble that are bound to ensue; and equal blame belongs to those who fail in their duty through weakness of will, which is the same as saying through shrinking from toil and pain. These cases are perfectly simple and easy to distinguish. In a free hour, when our power of choice is untrammelled and when nothing prevents our being able to do what we like best, every pleasure is to be welcomed and every pain avoided. But in certain circumstances and owing to the claims of duty or the obligations of business it will frequently occur that pleasures have to be repudiated and annoyances accepted. The wise man therefore always holds in these matters to this principle of selection: he rejects pleasures to secure other greater pleasures, or else he endures pains to avoid worse pains.But I must explain to you how all this mistaken idea of denouncing pleasure and praising pain was born and I will give you a complete account of the system, and expound the actual teachings of the great explorer of the truth, the master-builder of human happiness. No one rejects, dislikes, or avoids pleasure itself, because it is pleasure, but because those who do not know how to pursue pleasure rationally encounter consequences that are extremely painful. Nor again is there anyone who loves or pursues or desires to obtain pain of itself, because it is pain, but because occasionally circumstances occur in which toil and pain can procure him some great pleasure. To take a trivial example, which of us ever undertakes laborious physical exercise, except to obtain some advantage from it? But who has any right to find fault with a man who chooses to enjoy a pleasure that has no annoying consequences, or one who avoids a pain that produces no resultant pleasure? On the other hand, we denounce with righteous indignation and dislike men who are so beguiled and demoralized by the charms of pleasure of the moment, so blinded by desire, that they cannot foresee the pain and trouble that are bound to ensue; and equal blame belongs to those who fail in their duty through weakness of will, which is the same as saying through shrinking from toil and pain. These cases are perfectly simple and easy to distinguish. In a free hour, when our power of choice is untrammelled and when nothing prevents our being able to do what we like best, every pleasure is to be welcomed and every pain avoided. But in certain circumstances and owing to the claims of duty or the obligations of business it will frequently occur that pleasures have to be repudiated and annoyances accepted. The wise man therefore always holds in these matters to this principle of selection:".split(" ") 7 | corpus3 = "A wonderful serenity has taken possession of my entire soul, like these sweet mornings of spring which I enjoy with my whole heart. I am alone, and feel the charm of existence in this spot, which was created for the bliss of souls like mine. I am so happy, my dear friend, so absorbed in the exquisite sense of mere tranquil existence, that I neglect my talents. I should be incapable of drawing a single stroke at the present moment; and yet I feel that I never was a greater artist than now. When, while the lovely valley teems with vapour around me, and the meridian sun strikes the upper surface of the impenetrable foliage of my trees, and but a few stray gleams steal into the inner sanctuary, I throw myself down among the tall grass by the trickling stream; and, as I lie close to the earth, a thousand unknown plants are noticed by me: when I hear the buzz of the little world among the stalks, and grow familiar with the countless indescribable forms of the insects and flies, then I feel the presence of the Almighty, who formed us in his own image, and the breath of that universal love which bears and sustains us, as it floats around us in an eternity of bliss; and then, my friend, when darkness overspreads my eyes, and heaven and earth seem to dwell in my soul and absorb its power, like the form of a beloved mistress, then I often think with longing, Oh, would I could describe these conceptions, could impress upon paper all that is living so full and warm within me, that it might be the mirror of my soul, as my soul is the mirror of the infinite God! O my friend -- but it is too much for my strength -- I sink under the weight of the splendour of these visions! A wonderful serenity has taken possession of my entire soul, like these sweet mornings of spring which I enjoy with my whole heart. I am alone, and feel the charm of existence in this spot, which was created for the bliss of souls like mine. I am so happy, my dear friend, so absorbed in the exquisite sense of mere tranquil existence, that I neglect my talents. I should be incapable of drawing a single stroke at the present moment; and yet I feel that I never was a greater artist than now. When, while the lovely valley teems with vapour around me, and the meridian sun strikes the upper surface of the impenetrable foliage of my trees, and but a few stray gleams steal into the inner sanctuary, I throw myself down among the tall grass by the trickling stream; and, as I lie close to the earth, a thousand unknown plants are noticed by me: when I hear the buzz of the little world among the stalks, and grow familiar with the countless indescribable forms of the insects and flies, then I feel the presence of the Almighty, who formed us in his own image, and the breath of that universal love which bears and sustains us, as it floats around us in an eternity of bliss; and then, my friend, when darkness overspreads my eyes, and heaven and earth seem to dwell in my soul and absorb its power, like the form of a beloved mistress, then I often think with longing, Oh, would I could describe these conceptions, could impress upon paper all that is living so full and warm within me, that it might be the mirror of my soul, as my soul is the mirror of the infinite God! O my friend -- but it is too much for my strength -- I sink under the weight of the splendour of these visions! A wonderful serenity has taken possession of my entire soul, like these sweet mornings of spring which I enjoy with my whole heart. I am alone, and feel the charm of existence in this spot, which was created for the bliss of souls like mine. I am so happy, my dear friend, so absorbed in the exquisite sense of mere tranquil existence, that I neglect my talents. I should be incapable of drawing a single stroke at the present moment; and yet I feel that I never was a greater artist than now. When, while the lovely valley teems with vapour around me, and the meridian sun strikes the upper surface of the impenetrable foliage of my trees, and but a few stray gleams steal into the inner sanctuary, I throw myself down among the tall grass by the trickling stream; and, as I lie close to the earth, a thousand unknown plants are noticed by me: when I hear the buzz of the little world among the stalks, and grow familiar with the countless indescribable forms of the insects and flies, then I feel the presence of the Almighty, who formed us in his own image, and the breath of that universal love which bears and sustains us, as it floats around us in an eternity of bliss; and then, my friend, when darkness overspreads my eyes, and heaven and earth seem to dwell in my soul and absorb its power, like the form of a beloved mistress, then I often think with longing, Oh, would I could describe these conceptions, could impress upon paper all that is living so full and warm within me, that it might be the mirror of my soul, as my soul is the mirror of the infinite God! O my friend -- but it is too much for my strength -- I sink under the weight of the splendour of these visions!A wonderful serenity has taken possession of my entire soul, like these sweet mornings of spring which I enjoy with my whole heart. I am alone, and feel the charm of existence in this spot, which was created for the bliss of souls".split(" ") 8 | corpus4 = "Проснувшись однажды утром после беспокойного сна, Грегор Замза обнаружил, что он у себя в постели превратился в страшное насекомое. Лежа на панцирнотвердой спине, он видел, стоило ему приподнять голову, свой коричневый, выпуклый, разделенный дугообразными чешуйками живот, на верхушке которого еле держалось готовое вот-вот окончательно сползти одеяло. Его многочисленные, убого тонкие по сравнению с остальным телом ножки беспомощно копошились у него перед глазами. «Что со мной случилось?» – подумал он. Это не было сном. Его комната, настоящая, разве что слишком маленькая, но обычная комната, мирно покоилась в своих четырех хорошо знакомых стенах. Над столом, где были разложены распакованные образцы сукон – Замза был коммивояжером, – висел портрет, который он недавно вырезал из иллюстрированного журнала и вставил в красивую золоченую рамку. На портрете была изображена дама в меховой шляпе и боа, она сидела очень прямо и протягивала зрителю тяжелую меховую муфту, в которой целиком исчезала ее рука. Затем взгляд Грегора устремился в окно, и пасмурная погода – слышно было, как по жести подоконника стучат капли дождя – привела его и вовсе в грустное настроение. «Хорошо бы еще немного поспать и забыть всю эту чепуху», – подумал он, но это было совершенно неосуществимо, он привык спать на правом боку, а в теперешнем своем состоянии он никак не мог принять этого положения. С какой бы силой ни поворачивался он на правый бок, он неизменно сваливался опять на спину. Закрыв глаза, чтобы не видеть своих барахтающихся ног, он проделал это добрую сотню раз и отказался от этих попыток только тогда, когда почувствовал какую-то неведомую дотоле, тупую и слабую боль в боку. «Ах ты, господи, – подумал он, – какую я выбрал хлопотную профессию! Изо дня в день в разъездах. Деловых волнений куда больше, чем на месте, в торговом доме, а кроме того, изволь терпеть тяготы дороги, думай о расписании поездов, мирись с плохим, нерегулярным питанием, завязывай со все новыми и новыми людьми недолгие, никогда не бывающие сердечными отношения. Черт бы побрал все это!» Он почувствовал вверху живота легкий зуд; медленно подвинулся на спине к прутьям кровати, чтобы удобнее было поднять голову; нашел зудевшее место, сплошь покрытое, как оказалось, белыми непонятными точечками; хотел было ощупать это место одной из ножек, но сразу отдернул ее, ибо даже простое прикосновение вызвало у него, Грегора, озноб. Он соскользнул в прежнее свое положение. «От этого раннего вставания, – подумал он, – можно совсем обезуметь. Человек должен высыпаться. Другие коммивояжеры живут, как одалиски. Когда я, например, среди дня возвращаюсь в гостиницу, чтобы переписать полученные заказы, эти господа только завтракают. А осмелься я вести себя так, мои хозяин выгнал бы меня сразу. Кто знает, впрочем, может быть, это было бы даже очень хорошо для меня. Если бы я не сдерживался ради родителей, я бы давно заявил об уходе, я бы подошел к своему хозяину и выложил ему все, что о нем думаю. Он бы так и свалился с конторки! Странная у него манера – садиться на конторку и с ее высоты разговаривать со служащим, который вдобавок вынужден подойти вплотную к конторке из-за того, что хозяин туг на ухо. Однако надежда еще не совсем потеряна: как только я накоплю денег, чтобы выплатить долг моих родителей – на это уйдет еще лет пять-шесть, – я так и поступлю. Тут-то мы и распрощаемся раз и навсегда. А пока что надо подниматься, мой поезд отходит в пять». И он взглянул на будильник, который тикал на сундуке. «Боже правый!» – подумал он. Было половина седьмого, и стрелки спокойно двигались дальше, было даже больше половины, без малого уже три четверти. Неужели будильник не звонил? С кровати было видно, что он поставлен правильно, на четыре часа; и он, несомненно, звонил. Но как можно было спокойно спать под этот сотрясающий мебель трезвон? Ну, спал-то он неспокойно, но, видимо, крепко. Однако что делать теперь? Следующий поезд уходит в семь часов; чтобы поспеть на него, он должен отчаянно торопиться, а набор образцов еще не упакован, да и сам он отнюдь не чувствует себя свежим и легким на подъем. И даже поспей он на поезд, хозяйского разноса ему все равно не избежать – ведь рассыльный торгового дома дежурил у пятичасового поезда и давно доложил о его, Грегора, опоздании. Рассыльный, человек бесхарактерный и неумный, был ставленником хозяина. А что, если сказаться больным? Но это было бы крайне неприятно и показалось бы подозрительным, ибо за пятилетнюю свою службу Грегор ни разу еще не болел. Хозяин, конечно, привел бы врача больничной кассы и стал попрекать родителей сыном-лентяем, отводя любые возражения ссылкой на этого врача, по мнению которого все люди на свете совершенно здоровы и только не любят работать. И разве в данном случае он был бы так уж неправ? Если не считать сонливости, действительно странной после такого долгого сна, Грегор и в самом деле чувствовал себя превосходно и был даже чертовски голоден.Проснувшись однажды утром после беспокойного сна, Грегор Замза обнаружил, что он у себя в постели превратился в страшное насекомое. Лежа на панцирнотвердой спине, он видел, стоило ему приподнять голову, свой коричневый, выпуклый, разделенный дугообразными чешуйками живот, на верхушке которого еле держалось готовое вот-вот окончательно сползти одеяло. Его многочисленные, убого тонкие по сравнению с остальным телом ножки беспомощно копошились у него перед глазами. «Что со мной случилось?» – подумал он. Это не было сном. Его комната, настоящая, разве что слишком маленькая, но обычная комната, мирно покоилась в своих четырех хорошо знакомых стенах. Над столом, где были разложены распакованные образцы сукон – Замза был коммивояжером, – висел портрет, который он недавно вырезал из иллюстрированного журнала и вставил в красивую золоченую рамку. На портрете была изображена дама в меховой шляпе и боа, она сидела очень прямо и протягивала зрителю тяжелую меховую муфту, в которой целиком исчезала ее рука. Затем взгляд Грегора устремился в окно, и пасмурная погода – слышно было, как по жести подоконника стучат капли дождя – привела его и вовсе в грустное настроение. «Хорошо бы еще немного поспать и забыть всю эту чепуху», – подумал он, но это было совершенно неосуществимо, он привык спать на правом боку, а в теперешнем своем состоянии он никак не мог принять этого положения. С какой бы силой ни поворачивался он на правый бок, он неизменно сваливался опять на спину.".split(" ") 9 | corpus5 = "Weit hinten, hinter den Wortbergen, fern der Länder Vokalien und Konsonantien leben die Blindtexte. Abgeschieden wohnen sie in Buchstabhausen an der Küste des Semantik, eines großen Sprachozeans. Ein kleines Bächlein namens Duden fließt durch ihren Ort und versorgt sie mit den nötigen Regelialien. Es ist ein paradiesmatisches Land, in dem einem gebratene Satzteile in den Mund fliegen. Nicht einmal von der allmächtigen Interpunktion werden die Blindtexte beherrscht – ein geradezu unorthographisches Leben. Eines Tages aber beschloß eine kleine Zeile Blindtext, ihr Name war Lorem Ipsum, hinaus zu gehen in die weite Grammatik. Der große Oxmox riet ihr davon ab, da es dort wimmele von bösen Kommata, wilden Fragezeichen und hinterhältigen Semikoli, doch das Blindtextchen ließ sich nicht beirren. Es packte seine sieben Versalien, schob sich sein Initial in den Gürtel und machte sich auf den Weg. Als es die ersten Hügel des Kursivgebirges erklommen hatte, warf es einen letzten Blick zurück auf die Skyline seiner Heimatstadt Buchstabhausen, die Headline von Alphabetdorf und die Subline seiner eigenen Straße, der Zeilengasse. Wehmütig lief ihm eine rhetorische Frage über die Wange, dann setzte es seinen Weg fort. Unterwegs traf es eine Copy. Die Copy warnte das Blindtextchen, da, wo sie herkäme wäre sie zigmal umgeschrieben worden und alles, was von ihrem Ursprung noch übrig wäre, sei das Wort \"und\" und das Blindtextchen solle umkehren und wieder in sein eigenes, sicheres Land zurückkehren. Doch alles Gutzureden konnte es nicht überzeugen und so dauerte es nicht lange, bis ihm ein paar heimtückische Werbetexter auflauerten, es mit Longe und Parole betrunken machten und es dann in ihre Agentur schleppten, wo sie es für ihre Projekte wieder und wieder mißbrauchten. Und wenn es nicht umgeschrieben wurde, dann benutzen Sie es immernoch. Weit hinten, hinter den Wortbergen, fern der Länder Vokalien und Konsonantien leben die Blindtexte. Abgeschieden wohnen sie in Buchstabhausen an der Küste des Semantik, eines großen Sprachozeans. Ein kleines Bächlein namens Duden fließt durch ihren Ort und versorgt sie mit den nötigen Regelialien. Es ist ein paradiesmatisches Land, in dem einem gebratene Satzteile in den Mund fliegen. Nicht einmal von der allmächtigen Interpunktion werden die Blindtexte beherrscht – ein geradezu unorthographisches Leben. Eines Tages aber beschloß eine kleine Zeile Blindtext, ihr Name war Lorem Ipsum, hinaus zu gehen in die weite Grammatik. Der große Oxmox riet ihr davon ab, da es dort wimmele von bösen Kommata, wilden Fragezeichen und hinterhältigen Semikoli, doch das Blindtextchen ließ sich nicht beirren. Es packte seine sieben Versalien, schob sich sein Initial in den Gürtel und machte sich auf den Weg. Als es die ersten Hügel des Kursivgebirges erklommen hatte, warf es einen letzten Blick zurück auf die Skyline seiner Heimatstadt Buchstabhausen, die Headline von Alphabetdorf und die Subline seiner eigenen Straße, der Zeilengasse. Wehmütig lief ihm eine rhetorische Frage über die Wange, dann setzte es seinen Weg fort. Unterwegs traf es eine Copy. Die Copy warnte das Blindtextchen, da, wo sie herkäme wäre sie zigmal umgeschrieben worden und alles, was von ihrem Ursprung noch übrig wäre, sei das Wort \"und\" und das Blindtextchen solle umkehren und wieder in sein eigenes, sicheres Land zurückkehren. Doch alles Gutzureden konnte es nicht überzeugen und so dauerte es nicht lange, bis ihm ein paar heimtückische Werbetexter auflauerten, es mit Longe und Parole betrunken machten und es dann in ihre Agentur schleppten, wo sie es für ihre Projekte wieder und wieder mißbrauchten. Und wenn es nicht umgeschrieben wurde, dann benutzen Sie es immernoch. Weit hinten, hinter den Wortbergen, fern der Länder Vokalien und Konsonantien leben die Blindtexte. Abgeschieden wohnen sie in Buchstabhausen an der Küste des Semantik, eines großen Sprachozeans. Ein kleines Bächlein namens Duden fließt durch ihren Ort und versorgt sie mit den nötigen Regelialien. Es ist ein paradiesmatisches Land, in dem einem gebratene Satzteile in den Mund fliegen. Nicht einmal von der allmächtigen Interpunktion werden die Blindtexte beherrscht – ein geradezu unorthographisches Leben. Eines Tages aber beschloß eine kleine Zeile Blindtext, ihr Name war Lorem Ipsum, hinaus zu gehen in die weite Grammatik. Der große Oxmox riet ihr davon ab, da es dort wimmele von bösen Kommata, wilden Fragezeichen und hinterhältigen Semikoli, doch das Blindtextchen ließ sich nicht beirren. Es packte seine sieben Versalien, schob sich sein Initial in den Gürtel und machte sich auf den Weg. Als es die ersten Hügel des Kursivgebirges erklommen hatte, warf es einen letzten Blick zurück auf die Skyline seiner Heimatstadt Buchstabhausen, die Headline von Alphabetdorf und die Subline seiner eigenen Straße, der Zeilengasse. Wehmütig lief ihm eine rhetorische Frage über die Wange, dann setzte es seinen Weg fort. Unterwegs traf es eine Copy. Die Copy warnte das Blindtextchen, da, wo sie herkäme wäre sie zigmal umgeschrieben worden und alles, was von ihrem Ursprung noch übrig wäre, sei das Wort \"und\" und das Blindtextchen solle umkehren und wieder in sein eigenes, sicheres Land zurückkehren. Doch alles Gutzureden konnte es nicht überzeugen und so dauerte es nicht lange, bis ihm ein paar heimtückische Werbetexter auflauerten, es mit Longe und Parole betrunken machten und es dann in ihre Agentur schleppten, wo sie es für ihre Projekte wieder und wieder mißbrauchten. Und wenn es nicht umgeschrieben wurde, dann benutzen Sie es immernoch.Weit hinten, hinter den Wortbergen, fern der Länder Vokalien und Konsonantien leben die Blindtexte. Abgeschieden wohnen sie in Buchstabhausen an der Küste des Semantik, eines großen Sprachozeans. Ein kleines Bächlein namens Duden fließt durch ihren Ort und versorgt sie mit den nötigen Regelialien. Es ist ein paradiesmatisches Land, in dem einem gebratene Satzteile in den Mund fliegen. Nicht einmal von der allmächtigen Interpunktion werden die Blindtexte beherrscht – ein geradezu unorthographisches Leben. Eines Tages aber beschloß eine kleine Zeile Blindtext, ihr Name war Lorem Ipsum, hinaus zu gehen in die weite Grammatik. Der große Oxmox riet ihr davon ab, da es dort wimmele von bösen Kommata, wilden Fragezeichen und hinterhältigen Semikoli, doch das Blindtextchen ließ sich nicht beirren. Es packte seine sieben Versalien, schob sich sein Initial in den Gürtel und machte sich auf den Weg. Als es die ersten Hügel des Kursivgebirges erklommen hatte, warf es einen".split(" ") 10 | corpus6 = "米くを舵4物委らご氏松ハナテフ月関ソ時平ふいの博情れじフ牟万い元56園フメヤオ試図ロツヤ未備王こと傷喫羅踊んゆし。栃ユヱオ書著作ユソツロ英祉業ア大課ご権質フべ空8午キ切軟づン著郎そゃす格町採ヱオマコ処8付国ムハチア究表でなだ際無ロミヱ地兵ぴげ庭体すク発抜爆位や。楽富むゆず盛航カナセ携代ハ本高きた員59今骸ンラえぜ城解イケ穴訴ぽぎ属住ヤケトヌ抱点ト広注厚でて。 国リ出難セユメ軍手ヘカウ画形サヲシ猛85用ヲキミ心死よしと身処ケヨミオ教主ーぽ事業んく字国たさょ図能シミスヤ社8板ル岡世58次戒知院んれり。市メ誘根カ数問禁竹ゃれえみ給辺のでみき今二ぎさ裕止過こクすと無32郎所ラた生展ヌヘス成度慣葬勇厘ばてか。室ゃ下携疲ム色権がぽりっ銃週ノオ姫千テム健蔵い研手ッ放容ル告属め旅側26企サノヨ宅都福ぞ通待ちぴね種脳イど労希望義通むン。 罰しい続負せ著低たル異師ユハワ東添質コチ転集ルヤ雇聴約ヒ前統らた情厳ゆさでや真胸や有披暑棚豆ゆぼたけ。盛ワセロナ情競クるっわ講3音ずをせ少地めしぜょ手63明視れに判企ヒヌエソ求総58特本ね井比ユラキ禁頭馬るゅリす能率率かがさわ。葉サソ医郡ヱヘソ労帰ナケスミ救写ワヘ株審ネヒニミ安逮イ人画ラ涯車はラ極騒りなド件5級ンかふー劇41著ぱぐ凱討だ文世ぶづどま界善魅マ渓経競融れがや。 連ーぜらご模分ッ視外ばフく運発群ほぼづ育越一ほごクけ案募ヲイソ治会イせフ製君ぜた漢村1変リヒ構5際ツ御文ヲ臭入さドぼ代書ハケ引技ろみれ回観注倉徹ぱ。論ラづ海要サ情座ゃり齢宣ラモエ芸化エマホ覧催回ら戦69本外ト葬岳な政画か連針ぴリフず。約ル闘辺ぽ経2応掲ホサアラ塾小コラ画決クノオ上室レヌヱ勝逮ぜるえむ責豊チノ明意ひけ訟6碁草メタチエ財午召喝塊む。 決めでわ名金つけレわ続人県約ぽぼす尾腹ユサ戦載リシ護賀レモフツ重涯ニ治者むんっみ職更カタチレ提話2何ワ責東まけげふ能政ヌ供禁がびてわ提改倶れめ。読み担後ぽ安加ぎ論鹿ツ統最お気麻月つじもあ竜思いろめ判必満理トコ文連ムイウハ寄串ざほびー。文ゆこっ向27年メイ便能ノセヲ待1王スねたゆ伝派んね点過カト治読よにきべ使人スシ都言え阻8割べづえみ注引敷的岳犠眠どそ。 学用イだ医客開ロ供界もぞだ実隆モイヌ務坂ナコヲ権野ろづ初場ぱ低会づぱじ新倒コ化政レ止奮浸猪ッわえづ。形いやリ要帰ほまむだ業領スル必打さ島14巻リ集日ネヘホタ面幅ち写上そぴ円図ムタコモ報使イわざと会催ヤヲ康証をドぶレ盤岡ホハツ作29管しをめ公問懐蓄っさ。来ゆぼあぱ投秋シ語右ぐ身靖かば辛握捕家記ヘワ神岐囲づ毘観メテクツ政73夕罪57需93誌飲査仁さ。 変レめ束球よんま会特ヱコ聞重だ史純ーどる件32浦レぴよゃ上強ネラリロ査従セユヤ専棋光レ作表ひぶ予正ぜーな誉確フス函6報円ス進治ね能営済否雄でわょ。42生型ば着続ア短実ぎおめび前環闘ラヤヲル診均っとにの声公トヱテマ整試椅情久妊舌頃ざとっく。品キチトテ阿国ラら受87世ヲフセリ川86個ーょぼげ危子ヘレカメ無会ぱかへ事通んかて電条ロツ徴商ぶぞそを居暑メ害広せもがり禁応レミヲ応響割壮憶はぱ。 千れンが織財メニ況界ネトレミ学豊フオホシ近月レたやご的罪ょな菱技ちる警栗エセ提89林危氷48参ア説森クキヒヱ薬社ホコエリ負和ルび紀下ケミイ掲歳特ごず扱底ク護木連ちクを各形ばすか。変ぱなれ町7融ヌ街準以タユヘム質裕ぶで遺語俊ぎずょ事金文キ写多山ーゆに歩帯すで会世クぜよ論写ヲ達71林危氷5間続ぎぜび高怠す。 係8青け応著ミ戦条ナヘネカ思79未ぎ算伊をゃ泉人ーづ需説っ畑鹿27軽ラソツ権2促千護ルロナカ開国ケ暴嶋ご池表だ。佐フナ訪麻はてせば勝効をあ医戦画とさわぴ者両すいあ並来んば載食ぴ件友頂業へえぞ魚祝ネラ聞率スコリケ始全ンこび夫出ドふ今布うぎふゅ実克即哉循やしんな。 暮す備54依紀てッん末刊と柔称むてス無府ケイ変壌をぱ汁連フマス海世ヌ中負知問ナヘケ純推ひ読着ヒ言若私軽れ。掲けフむ王本オコ線人をっさ必和断セソヲハ図芸ちかな防長りぶは投新意相ツ並5余セ職岳ぞ端古空援そ。森ヨエチ題5東っ自兄ち暴5近鹿横ト的京ハ安氷ナキ深際ぎ並節くスむの権工ほルせ京49効タムチ処三ぞぴラ済国ずっ文経ヘトミ水分準そが。".split(" ") 11 | corpus7 = "AbortMultipartUpload CompleteMultipartUpload CopyObject CreateBucket CreateMultipartUpload DeleteBucket DeleteBucketAnalyticsConfiguration DeleteBucketCors DeleteBucketEncryption DeleteBucketIntelligentTieringConfiguration DeleteBucketInventoryConfiguration DeleteBucketLifecycle DeleteBucketMetricsConfiguration DeleteBucketOwnershipControls DeleteBucketPolicy DeleteBucketReplication DeleteBucketTagging DeleteBucketWebsite DeleteObject DeleteObjects DeleteObjectTagging DeletePublicAccessBlock GetBucketAccelerateConfiguration GetBucketAcl GetBucketAnalyticsConfiguration GetBucketCors GetBucketEncryption GetBucketIntelligentTieringConfiguration GetBucketInventoryConfiguration GetBucketLifecycle GetBucketLifecycleConfiguration GetBucketLocation GetBucketLogging GetBucketMetricsConfiguration GetBucketNotification GetBucketNotificationConfiguration GetBucketOwnershipControls GetBucketPolicy GetBucketPolicyStatus GetBucketReplication GetBucketRequestPayment GetBucketTagging GetBucketVersioning GetBucketWebsite GetObject GetObjectAcl GetObjectAttributes GetObjectLegalHold GetObjectLockConfiguration GetObjectRetention GetObjectTagging GetObjectTorrent GetPublicAccessBlock HeadBucket HeadObject ListBucketAnalyticsConfigurations ListBucketIntelligentTieringConfigurations ListBucketInventoryConfigurations ListBucketMetricsConfigurations ListBuckets ListMultipartUploads ListObjects ListObjectsV2 ListObjectVersions ListParts PutBucketAccelerateConfiguration PutBucketAcl PutBucketAnalyticsConfiguration PutBucketCors PutBucketEncryption PutBucketIntelligentTieringConfiguration PutBucketInventoryConfiguration PutBucketLifecycle PutBucketLifecycleConfiguration PutBucketLogging PutBucketMetricsConfiguration PutBucketNotification PutBucketNotificationConfiguration PutBucketOwnershipControls PutBucketPolicy PutBucketReplication PutBucketRequestPayment PutBucketTagging PutBucketVersioning PutBucketWebsite PutObject PutObjectAcl PutObjectLegalHold PutObjectLockConfiguration PutObjectRetention PutObjectTagging PutPublicAccessBlock RestoreObject SelectObjectContent UploadPart UploadPartCopy WriteGetObjectResponse CreateAccessPoint CreateAccessPointForObjectLambda CreateBucket CreateJob CreateMultiRegionAccessPoint DeleteAccessPoint DeleteAccessPointForObjectLambda DeleteAccessPointPolicy DeleteAccessPointPolicyForObjectLambda DeleteBucket DeleteBucketLifecycleConfiguration DeleteBucketPolicy DeleteBucketReplication DeleteBucketTagging DeleteJobTagging DeleteMultiRegionAccessPoint DeletePublicAccessBlock DeleteStorageLensConfiguration DeleteStorageLensConfigurationTagging DescribeJob DescribeMultiRegionAccessPointOperation GetAccessPoint GetAccessPointConfigurationForObjectLambda GetAccessPointForObjectLambda GetAccessPointPolicy GetAccessPointPolicyForObjectLambda GetAccessPointPolicyStatus GetAccessPointPolicyStatusForObjectLambda GetBucket GetBucketLifecycleConfiguration GetBucketPolicy GetBucketReplication GetBucketTagging GetBucketVersioning GetJobTagging GetMultiRegionAccessPoint GetMultiRegionAccessPointPolicy GetMultiRegionAccessPointPolicyStatus GetMultiRegionAccessPointRoutes GetPublicAccessBlock GetStorageLensConfiguration GetStorageLensConfigurationTagging ListAccessPoints ListAccessPointsForObjectLambda ListJobs ListMultiRegionAccessPoints ListRegionalBuckets ListStorageLensConfigurations PutAccessPointConfigurationForObjectLambda PutAccessPointPolicy PutAccessPointPolicyForObjectLambda PutBucketLifecycleConfiguration PutBucketPolicy PutBucketReplication PutBucketTagging PutBucketVersioning PutJobTagging PutMultiRegionAccessPointPolicy PutPublicAccessBlock PutStorageLensConfiguration PutStorageLensConfigurationTagging SubmitMultiRegionAccessPointRoutes UpdateJobPriority UpdateJobStatus".split(" ") 12 | corpus8 = Path("/usr/share/dict/words").read_text().splitlines() 13 | all = [corpus1, corpus2, corpus3, corpus4, corpus5, corpus6, corpus7, corpus8] 14 | for j, corpus in enumerate(all): 15 | total_put = 0 16 | total_get = 0 17 | for _ in range(10): 18 | sum = 0 19 | d = {} 20 | 21 | for i, c in enumerate(corpus): 22 | tik = time_ns() 23 | d[c] = i 24 | tok = time_ns() 25 | total_put += tok - tik 26 | 27 | for c in corpus: 28 | tik = time_ns() 29 | a = d[c] 30 | tok = time_ns() 31 | total_get += tok - tik 32 | sum += a 33 | print(f"Corpus {j + 1}") 34 | print(f"Avg time put: {(total_put / 10.0) / len(corpus)}") 35 | print(f"Avg time get: {(total_get / 10.0) / len(corpus)}") 36 | print(sum) 37 | 38 | if __name__ == "__main__": 39 | main() 40 | -------------------------------------------------------------------------------- /benchmark_other_languages/python/hash_functions/benchmark.py: -------------------------------------------------------------------------------- 1 | from time import time_ns 2 | 3 | def benchmark(corpus: list[str]): 4 | total = 0 5 | for _ in range(20): 6 | k = set() 7 | v = set() 8 | v512 = set() 9 | for key in corpus: 10 | k.add(key) 11 | tik = time_ns() 12 | h = hash(key) 13 | tok = time_ns() 14 | total += (tok - tik) 15 | v.add(h) 16 | v512.add(h % 512) 17 | min = 10000000 18 | max = 0 19 | sum = 0 20 | for key in k: 21 | l = len(key) 22 | sum += l 23 | if l < min: 24 | min = l 25 | if l > max: 26 | max = l 27 | avg = sum / len(k) 28 | 29 | print(f"Avg time: {(total / 20.0) / len(corpus)}, total elements: {len(corpus)}, unique elements: {len(k)}, collisions: {len(k) / len(v)}, collisions % 512: {len(k) / len(v512)}, keys min: {min}, avg: {avg}, max: {max}") 30 | 31 | 32 | def main(): 33 | corpus1 = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Quisque orci urna, pretium et porta ac, porttitor sit amet sem. Fusce sagittis lorem neque, vitae sollicitudin elit suscipit et. In interdum convallis nisl in ornare. Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia curae; Aliquam erat volutpat. Morbi mollis iaculis lectus ac tincidunt. Fusce nisi lacus, semper eu dignissim et, malesuada non mi. Sed euismod urna vel elit faucibus, eu bibendum ante fringilla. Curabitur tempus in turpis at mattis. Aliquam erat volutpat. Donec maximus elementum felis, sit amet dignissim augue tincidunt blandit. Aliquam fermentum, est eu mollis.".split(" ") 34 | corpus2 = "But I must explain to you how all this mistaken idea of denouncing pleasure and praising pain was born and I will give you a complete account of the system, and expound the actual teachings of the great explorer of the truth, the master-builder of human happiness. No one rejects, dislikes, or avoids pleasure itself, because it is pleasure, but because those who do not know how to pursue pleasure rationally encounter consequences that are extremely painful. Nor again is there anyone who loves or pursues or desires to obtain pain of itself, because it is pain, but because occasionally circumstances occur in which toil and pain can procure him some great pleasure. To take a trivial example, which of us ever undertakes laborious physical exercise, except to obtain some advantage from it? But who has any right to find fault with a man who chooses to enjoy a pleasure that has no annoying consequences, or one who avoids a pain that produces no resultant pleasure? On the other hand, we denounce with righteous indignation and dislike men who are so beguiled and demoralized by the charms of pleasure of the moment, so blinded by desire, that they cannot foresee the pain and trouble that are bound to ensue; and equal blame belongs to those who fail in their duty through weakness of will, which is the same as saying through shrinking from toil and pain. These cases are perfectly simple and easy to distinguish. In a free hour, when our power of choice is untrammelled and when nothing prevents our being able to do what we like best, every pleasure is to be welcomed and every pain avoided. But in certain circumstances and owing to the claims of duty or the obligations of business it will frequently occur that pleasures have to be repudiated and annoyances accepted. The wise man therefore always holds in these matters to this principle of selection: he rejects pleasures to secure other greater pleasures, or else he endures pains to avoid worse pains. But I must explain to you how all this mistaken idea of denouncing pleasure and praising pain was born and I will give you a complete account of the system, and expound the actual teachings of the great explorer of the truth, the master-builder of human happiness. No one rejects, dislikes, or avoids pleasure itself, because it is pleasure, but because those who do not know how to pursue pleasure rationally encounter consequences that are extremely painful. Nor again is there anyone who loves or pursues or desires to obtain pain of itself, because it is pain, but because occasionally circumstances occur in which toil and pain can procure him some great pleasure. To take a trivial example, which of us ever undertakes laborious physical exercise, except to obtain some advantage from it? But who has any right to find fault with a man who chooses to enjoy a pleasure that has no annoying consequences, or one who avoids a pain that produces no resultant pleasure? On the other hand, we denounce with righteous indignation and dislike men who are so beguiled and demoralized by the charms of pleasure of the moment, so blinded by desire, that they cannot foresee the pain and trouble that are bound to ensue; and equal blame belongs to those who fail in their duty through weakness of will, which is the same as saying through shrinking from toil and pain. These cases are perfectly simple and easy to distinguish. In a free hour, when our power of choice is untrammelled and when nothing prevents our being able to do what we like best, every pleasure is to be welcomed and every pain avoided. But in certain circumstances and owing to the claims of duty or the obligations of business it will frequently occur that pleasures have to be repudiated and annoyances accepted. The wise man therefore always holds in these matters to this principle of selection: he rejects pleasures to secure other greater pleasures, or else he endures pains to avoid worse pains.But I must explain to you how all this mistaken idea of denouncing pleasure and praising pain was born and I will give you a complete account of the system, and expound the actual teachings of the great explorer of the truth, the master-builder of human happiness. No one rejects, dislikes, or avoids pleasure itself, because it is pleasure, but because those who do not know how to pursue pleasure rationally encounter consequences that are extremely painful. Nor again is there anyone who loves or pursues or desires to obtain pain of itself, because it is pain, but because occasionally circumstances occur in which toil and pain can procure him some great pleasure. To take a trivial example, which of us ever undertakes laborious physical exercise, except to obtain some advantage from it? But who has any right to find fault with a man who chooses to enjoy a pleasure that has no annoying consequences, or one who avoids a pain that produces no resultant pleasure? On the other hand, we denounce with righteous indignation and dislike men who are so beguiled and demoralized by the charms of pleasure of the moment, so blinded by desire, that they cannot foresee the pain and trouble that are bound to ensue; and equal blame belongs to those who fail in their duty through weakness of will, which is the same as saying through shrinking from toil and pain. These cases are perfectly simple and easy to distinguish. In a free hour, when our power of choice is untrammelled and when nothing prevents our being able to do what we like best, every pleasure is to be welcomed and every pain avoided. But in certain circumstances and owing to the claims of duty or the obligations of business it will frequently occur that pleasures have to be repudiated and annoyances accepted. The wise man therefore always holds in these matters to this principle of selection:".split(" ") 35 | corpus3 = "A wonderful serenity has taken possession of my entire soul, like these sweet mornings of spring which I enjoy with my whole heart. I am alone, and feel the charm of existence in this spot, which was created for the bliss of souls like mine. I am so happy, my dear friend, so absorbed in the exquisite sense of mere tranquil existence, that I neglect my talents. I should be incapable of drawing a single stroke at the present moment; and yet I feel that I never was a greater artist than now. When, while the lovely valley teems with vapour around me, and the meridian sun strikes the upper surface of the impenetrable foliage of my trees, and but a few stray gleams steal into the inner sanctuary, I throw myself down among the tall grass by the trickling stream; and, as I lie close to the earth, a thousand unknown plants are noticed by me: when I hear the buzz of the little world among the stalks, and grow familiar with the countless indescribable forms of the insects and flies, then I feel the presence of the Almighty, who formed us in his own image, and the breath of that universal love which bears and sustains us, as it floats around us in an eternity of bliss; and then, my friend, when darkness overspreads my eyes, and heaven and earth seem to dwell in my soul and absorb its power, like the form of a beloved mistress, then I often think with longing, Oh, would I could describe these conceptions, could impress upon paper all that is living so full and warm within me, that it might be the mirror of my soul, as my soul is the mirror of the infinite God! O my friend -- but it is too much for my strength -- I sink under the weight of the splendour of these visions! A wonderful serenity has taken possession of my entire soul, like these sweet mornings of spring which I enjoy with my whole heart. I am alone, and feel the charm of existence in this spot, which was created for the bliss of souls like mine. I am so happy, my dear friend, so absorbed in the exquisite sense of mere tranquil existence, that I neglect my talents. I should be incapable of drawing a single stroke at the present moment; and yet I feel that I never was a greater artist than now. When, while the lovely valley teems with vapour around me, and the meridian sun strikes the upper surface of the impenetrable foliage of my trees, and but a few stray gleams steal into the inner sanctuary, I throw myself down among the tall grass by the trickling stream; and, as I lie close to the earth, a thousand unknown plants are noticed by me: when I hear the buzz of the little world among the stalks, and grow familiar with the countless indescribable forms of the insects and flies, then I feel the presence of the Almighty, who formed us in his own image, and the breath of that universal love which bears and sustains us, as it floats around us in an eternity of bliss; and then, my friend, when darkness overspreads my eyes, and heaven and earth seem to dwell in my soul and absorb its power, like the form of a beloved mistress, then I often think with longing, Oh, would I could describe these conceptions, could impress upon paper all that is living so full and warm within me, that it might be the mirror of my soul, as my soul is the mirror of the infinite God! O my friend -- but it is too much for my strength -- I sink under the weight of the splendour of these visions! A wonderful serenity has taken possession of my entire soul, like these sweet mornings of spring which I enjoy with my whole heart. I am alone, and feel the charm of existence in this spot, which was created for the bliss of souls like mine. I am so happy, my dear friend, so absorbed in the exquisite sense of mere tranquil existence, that I neglect my talents. I should be incapable of drawing a single stroke at the present moment; and yet I feel that I never was a greater artist than now. When, while the lovely valley teems with vapour around me, and the meridian sun strikes the upper surface of the impenetrable foliage of my trees, and but a few stray gleams steal into the inner sanctuary, I throw myself down among the tall grass by the trickling stream; and, as I lie close to the earth, a thousand unknown plants are noticed by me: when I hear the buzz of the little world among the stalks, and grow familiar with the countless indescribable forms of the insects and flies, then I feel the presence of the Almighty, who formed us in his own image, and the breath of that universal love which bears and sustains us, as it floats around us in an eternity of bliss; and then, my friend, when darkness overspreads my eyes, and heaven and earth seem to dwell in my soul and absorb its power, like the form of a beloved mistress, then I often think with longing, Oh, would I could describe these conceptions, could impress upon paper all that is living so full and warm within me, that it might be the mirror of my soul, as my soul is the mirror of the infinite God! O my friend -- but it is too much for my strength -- I sink under the weight of the splendour of these visions!A wonderful serenity has taken possession of my entire soul, like these sweet mornings of spring which I enjoy with my whole heart. I am alone, and feel the charm of existence in this spot, which was created for the bliss of souls".split(" ") 36 | corpus4 = "Проснувшись однажды утром после беспокойного сна, Грегор Замза обнаружил, что он у себя в постели превратился в страшное насекомое. Лежа на панцирнотвердой спине, он видел, стоило ему приподнять голову, свой коричневый, выпуклый, разделенный дугообразными чешуйками живот, на верхушке которого еле держалось готовое вот-вот окончательно сползти одеяло. Его многочисленные, убого тонкие по сравнению с остальным телом ножки беспомощно копошились у него перед глазами. «Что со мной случилось?» – подумал он. Это не было сном. Его комната, настоящая, разве что слишком маленькая, но обычная комната, мирно покоилась в своих четырех хорошо знакомых стенах. Над столом, где были разложены распакованные образцы сукон – Замза был коммивояжером, – висел портрет, который он недавно вырезал из иллюстрированного журнала и вставил в красивую золоченую рамку. На портрете была изображена дама в меховой шляпе и боа, она сидела очень прямо и протягивала зрителю тяжелую меховую муфту, в которой целиком исчезала ее рука. Затем взгляд Грегора устремился в окно, и пасмурная погода – слышно было, как по жести подоконника стучат капли дождя – привела его и вовсе в грустное настроение. «Хорошо бы еще немного поспать и забыть всю эту чепуху», – подумал он, но это было совершенно неосуществимо, он привык спать на правом боку, а в теперешнем своем состоянии он никак не мог принять этого положения. С какой бы силой ни поворачивался он на правый бок, он неизменно сваливался опять на спину. Закрыв глаза, чтобы не видеть своих барахтающихся ног, он проделал это добрую сотню раз и отказался от этих попыток только тогда, когда почувствовал какую-то неведомую дотоле, тупую и слабую боль в боку. «Ах ты, господи, – подумал он, – какую я выбрал хлопотную профессию! Изо дня в день в разъездах. Деловых волнений куда больше, чем на месте, в торговом доме, а кроме того, изволь терпеть тяготы дороги, думай о расписании поездов, мирись с плохим, нерегулярным питанием, завязывай со все новыми и новыми людьми недолгие, никогда не бывающие сердечными отношения. Черт бы побрал все это!» Он почувствовал вверху живота легкий зуд; медленно подвинулся на спине к прутьям кровати, чтобы удобнее было поднять голову; нашел зудевшее место, сплошь покрытое, как оказалось, белыми непонятными точечками; хотел было ощупать это место одной из ножек, но сразу отдернул ее, ибо даже простое прикосновение вызвало у него, Грегора, озноб. Он соскользнул в прежнее свое положение. «От этого раннего вставания, – подумал он, – можно совсем обезуметь. Человек должен высыпаться. Другие коммивояжеры живут, как одалиски. Когда я, например, среди дня возвращаюсь в гостиницу, чтобы переписать полученные заказы, эти господа только завтракают. А осмелься я вести себя так, мои хозяин выгнал бы меня сразу. Кто знает, впрочем, может быть, это было бы даже очень хорошо для меня. Если бы я не сдерживался ради родителей, я бы давно заявил об уходе, я бы подошел к своему хозяину и выложил ему все, что о нем думаю. Он бы так и свалился с конторки! Странная у него манера – садиться на конторку и с ее высоты разговаривать со служащим, который вдобавок вынужден подойти вплотную к конторке из-за того, что хозяин туг на ухо. Однако надежда еще не совсем потеряна: как только я накоплю денег, чтобы выплатить долг моих родителей – на это уйдет еще лет пять-шесть, – я так и поступлю. Тут-то мы и распрощаемся раз и навсегда. А пока что надо подниматься, мой поезд отходит в пять». И он взглянул на будильник, который тикал на сундуке. «Боже правый!» – подумал он. Было половина седьмого, и стрелки спокойно двигались дальше, было даже больше половины, без малого уже три четверти. Неужели будильник не звонил? С кровати было видно, что он поставлен правильно, на четыре часа; и он, несомненно, звонил. Но как можно было спокойно спать под этот сотрясающий мебель трезвон? Ну, спал-то он неспокойно, но, видимо, крепко. Однако что делать теперь? Следующий поезд уходит в семь часов; чтобы поспеть на него, он должен отчаянно торопиться, а набор образцов еще не упакован, да и сам он отнюдь не чувствует себя свежим и легким на подъем. И даже поспей он на поезд, хозяйского разноса ему все равно не избежать – ведь рассыльный торгового дома дежурил у пятичасового поезда и давно доложил о его, Грегора, опоздании. Рассыльный, человек бесхарактерный и неумный, был ставленником хозяина. А что, если сказаться больным? Но это было бы крайне неприятно и показалось бы подозрительным, ибо за пятилетнюю свою службу Грегор ни разу еще не болел. Хозяин, конечно, привел бы врача больничной кассы и стал попрекать родителей сыном-лентяем, отводя любые возражения ссылкой на этого врача, по мнению которого все люди на свете совершенно здоровы и только не любят работать. И разве в данном случае он был бы так уж неправ? Если не считать сонливости, действительно странной после такого долгого сна, Грегор и в самом деле чувствовал себя превосходно и был даже чертовски голоден.Проснувшись однажды утром после беспокойного сна, Грегор Замза обнаружил, что он у себя в постели превратился в страшное насекомое. Лежа на панцирнотвердой спине, он видел, стоило ему приподнять голову, свой коричневый, выпуклый, разделенный дугообразными чешуйками живот, на верхушке которого еле держалось готовое вот-вот окончательно сползти одеяло. Его многочисленные, убого тонкие по сравнению с остальным телом ножки беспомощно копошились у него перед глазами. «Что со мной случилось?» – подумал он. Это не было сном. Его комната, настоящая, разве что слишком маленькая, но обычная комната, мирно покоилась в своих четырех хорошо знакомых стенах. Над столом, где были разложены распакованные образцы сукон – Замза был коммивояжером, – висел портрет, который он недавно вырезал из иллюстрированного журнала и вставил в красивую золоченую рамку. На портрете была изображена дама в меховой шляпе и боа, она сидела очень прямо и протягивала зрителю тяжелую меховую муфту, в которой целиком исчезала ее рука. Затем взгляд Грегора устремился в окно, и пасмурная погода – слышно было, как по жести подоконника стучат капли дождя – привела его и вовсе в грустное настроение. «Хорошо бы еще немного поспать и забыть всю эту чепуху», – подумал он, но это было совершенно неосуществимо, он привык спать на правом боку, а в теперешнем своем состоянии он никак не мог принять этого положения. С какой бы силой ни поворачивался он на правый бок, он неизменно сваливался опять на спину.".split(" ") 37 | corpus5 = "Weit hinten, hinter den Wortbergen, fern der Länder Vokalien und Konsonantien leben die Blindtexte. Abgeschieden wohnen sie in Buchstabhausen an der Küste des Semantik, eines großen Sprachozeans. Ein kleines Bächlein namens Duden fließt durch ihren Ort und versorgt sie mit den nötigen Regelialien. Es ist ein paradiesmatisches Land, in dem einem gebratene Satzteile in den Mund fliegen. Nicht einmal von der allmächtigen Interpunktion werden die Blindtexte beherrscht – ein geradezu unorthographisches Leben. Eines Tages aber beschloß eine kleine Zeile Blindtext, ihr Name war Lorem Ipsum, hinaus zu gehen in die weite Grammatik. Der große Oxmox riet ihr davon ab, da es dort wimmele von bösen Kommata, wilden Fragezeichen und hinterhältigen Semikoli, doch das Blindtextchen ließ sich nicht beirren. Es packte seine sieben Versalien, schob sich sein Initial in den Gürtel und machte sich auf den Weg. Als es die ersten Hügel des Kursivgebirges erklommen hatte, warf es einen letzten Blick zurück auf die Skyline seiner Heimatstadt Buchstabhausen, die Headline von Alphabetdorf und die Subline seiner eigenen Straße, der Zeilengasse. Wehmütig lief ihm eine rhetorische Frage über die Wange, dann setzte es seinen Weg fort. Unterwegs traf es eine Copy. Die Copy warnte das Blindtextchen, da, wo sie herkäme wäre sie zigmal umgeschrieben worden und alles, was von ihrem Ursprung noch übrig wäre, sei das Wort \"und\" und das Blindtextchen solle umkehren und wieder in sein eigenes, sicheres Land zurückkehren. Doch alles Gutzureden konnte es nicht überzeugen und so dauerte es nicht lange, bis ihm ein paar heimtückische Werbetexter auflauerten, es mit Longe und Parole betrunken machten und es dann in ihre Agentur schleppten, wo sie es für ihre Projekte wieder und wieder mißbrauchten. Und wenn es nicht umgeschrieben wurde, dann benutzen Sie es immernoch. Weit hinten, hinter den Wortbergen, fern der Länder Vokalien und Konsonantien leben die Blindtexte. Abgeschieden wohnen sie in Buchstabhausen an der Küste des Semantik, eines großen Sprachozeans. Ein kleines Bächlein namens Duden fließt durch ihren Ort und versorgt sie mit den nötigen Regelialien. Es ist ein paradiesmatisches Land, in dem einem gebratene Satzteile in den Mund fliegen. Nicht einmal von der allmächtigen Interpunktion werden die Blindtexte beherrscht – ein geradezu unorthographisches Leben. Eines Tages aber beschloß eine kleine Zeile Blindtext, ihr Name war Lorem Ipsum, hinaus zu gehen in die weite Grammatik. Der große Oxmox riet ihr davon ab, da es dort wimmele von bösen Kommata, wilden Fragezeichen und hinterhältigen Semikoli, doch das Blindtextchen ließ sich nicht beirren. Es packte seine sieben Versalien, schob sich sein Initial in den Gürtel und machte sich auf den Weg. Als es die ersten Hügel des Kursivgebirges erklommen hatte, warf es einen letzten Blick zurück auf die Skyline seiner Heimatstadt Buchstabhausen, die Headline von Alphabetdorf und die Subline seiner eigenen Straße, der Zeilengasse. Wehmütig lief ihm eine rhetorische Frage über die Wange, dann setzte es seinen Weg fort. Unterwegs traf es eine Copy. Die Copy warnte das Blindtextchen, da, wo sie herkäme wäre sie zigmal umgeschrieben worden und alles, was von ihrem Ursprung noch übrig wäre, sei das Wort \"und\" und das Blindtextchen solle umkehren und wieder in sein eigenes, sicheres Land zurückkehren. Doch alles Gutzureden konnte es nicht überzeugen und so dauerte es nicht lange, bis ihm ein paar heimtückische Werbetexter auflauerten, es mit Longe und Parole betrunken machten und es dann in ihre Agentur schleppten, wo sie es für ihre Projekte wieder und wieder mißbrauchten. Und wenn es nicht umgeschrieben wurde, dann benutzen Sie es immernoch. Weit hinten, hinter den Wortbergen, fern der Länder Vokalien und Konsonantien leben die Blindtexte. Abgeschieden wohnen sie in Buchstabhausen an der Küste des Semantik, eines großen Sprachozeans. Ein kleines Bächlein namens Duden fließt durch ihren Ort und versorgt sie mit den nötigen Regelialien. Es ist ein paradiesmatisches Land, in dem einem gebratene Satzteile in den Mund fliegen. Nicht einmal von der allmächtigen Interpunktion werden die Blindtexte beherrscht – ein geradezu unorthographisches Leben. Eines Tages aber beschloß eine kleine Zeile Blindtext, ihr Name war Lorem Ipsum, hinaus zu gehen in die weite Grammatik. Der große Oxmox riet ihr davon ab, da es dort wimmele von bösen Kommata, wilden Fragezeichen und hinterhältigen Semikoli, doch das Blindtextchen ließ sich nicht beirren. Es packte seine sieben Versalien, schob sich sein Initial in den Gürtel und machte sich auf den Weg. Als es die ersten Hügel des Kursivgebirges erklommen hatte, warf es einen letzten Blick zurück auf die Skyline seiner Heimatstadt Buchstabhausen, die Headline von Alphabetdorf und die Subline seiner eigenen Straße, der Zeilengasse. Wehmütig lief ihm eine rhetorische Frage über die Wange, dann setzte es seinen Weg fort. Unterwegs traf es eine Copy. Die Copy warnte das Blindtextchen, da, wo sie herkäme wäre sie zigmal umgeschrieben worden und alles, was von ihrem Ursprung noch übrig wäre, sei das Wort \"und\" und das Blindtextchen solle umkehren und wieder in sein eigenes, sicheres Land zurückkehren. Doch alles Gutzureden konnte es nicht überzeugen und so dauerte es nicht lange, bis ihm ein paar heimtückische Werbetexter auflauerten, es mit Longe und Parole betrunken machten und es dann in ihre Agentur schleppten, wo sie es für ihre Projekte wieder und wieder mißbrauchten. Und wenn es nicht umgeschrieben wurde, dann benutzen Sie es immernoch.Weit hinten, hinter den Wortbergen, fern der Länder Vokalien und Konsonantien leben die Blindtexte. Abgeschieden wohnen sie in Buchstabhausen an der Küste des Semantik, eines großen Sprachozeans. Ein kleines Bächlein namens Duden fließt durch ihren Ort und versorgt sie mit den nötigen Regelialien. Es ist ein paradiesmatisches Land, in dem einem gebratene Satzteile in den Mund fliegen. Nicht einmal von der allmächtigen Interpunktion werden die Blindtexte beherrscht – ein geradezu unorthographisches Leben. Eines Tages aber beschloß eine kleine Zeile Blindtext, ihr Name war Lorem Ipsum, hinaus zu gehen in die weite Grammatik. Der große Oxmox riet ihr davon ab, da es dort wimmele von bösen Kommata, wilden Fragezeichen und hinterhältigen Semikoli, doch das Blindtextchen ließ sich nicht beirren. Es packte seine sieben Versalien, schob sich sein Initial in den Gürtel und machte sich auf den Weg. Als es die ersten Hügel des Kursivgebirges erklommen hatte, warf es einen".split(" ") 38 | corpus6 = "米くを舵4物委らご氏松ハナテフ月関ソ時平ふいの博情れじフ牟万い元56園フメヤオ試図ロツヤ未備王こと傷喫羅踊んゆし。栃ユヱオ書著作ユソツロ英祉業ア大課ご権質フべ空8午キ切軟づン著郎そゃす格町採ヱオマコ処8付国ムハチア究表でなだ際無ロミヱ地兵ぴげ庭体すク発抜爆位や。楽富むゆず盛航カナセ携代ハ本高きた員59今骸ンラえぜ城解イケ穴訴ぽぎ属住ヤケトヌ抱点ト広注厚でて。 国リ出難セユメ軍手ヘカウ画形サヲシ猛85用ヲキミ心死よしと身処ケヨミオ教主ーぽ事業んく字国たさょ図能シミスヤ社8板ル岡世58次戒知院んれり。市メ誘根カ数問禁竹ゃれえみ給辺のでみき今二ぎさ裕止過こクすと無32郎所ラた生展ヌヘス成度慣葬勇厘ばてか。室ゃ下携疲ム色権がぽりっ銃週ノオ姫千テム健蔵い研手ッ放容ル告属め旅側26企サノヨ宅都福ぞ通待ちぴね種脳イど労希望義通むン。 罰しい続負せ著低たル異師ユハワ東添質コチ転集ルヤ雇聴約ヒ前統らた情厳ゆさでや真胸や有披暑棚豆ゆぼたけ。盛ワセロナ情競クるっわ講3音ずをせ少地めしぜょ手63明視れに判企ヒヌエソ求総58特本ね井比ユラキ禁頭馬るゅリす能率率かがさわ。葉サソ医郡ヱヘソ労帰ナケスミ救写ワヘ株審ネヒニミ安逮イ人画ラ涯車はラ極騒りなド件5級ンかふー劇41著ぱぐ凱討だ文世ぶづどま界善魅マ渓経競融れがや。 連ーぜらご模分ッ視外ばフく運発群ほぼづ育越一ほごクけ案募ヲイソ治会イせフ製君ぜた漢村1変リヒ構5際ツ御文ヲ臭入さドぼ代書ハケ引技ろみれ回観注倉徹ぱ。論ラづ海要サ情座ゃり齢宣ラモエ芸化エマホ覧催回ら戦69本外ト葬岳な政画か連針ぴリフず。約ル闘辺ぽ経2応掲ホサアラ塾小コラ画決クノオ上室レヌヱ勝逮ぜるえむ責豊チノ明意ひけ訟6碁草メタチエ財午召喝塊む。 決めでわ名金つけレわ続人県約ぽぼす尾腹ユサ戦載リシ護賀レモフツ重涯ニ治者むんっみ職更カタチレ提話2何ワ責東まけげふ能政ヌ供禁がびてわ提改倶れめ。読み担後ぽ安加ぎ論鹿ツ統最お気麻月つじもあ竜思いろめ判必満理トコ文連ムイウハ寄串ざほびー。文ゆこっ向27年メイ便能ノセヲ待1王スねたゆ伝派んね点過カト治読よにきべ使人スシ都言え阻8割べづえみ注引敷的岳犠眠どそ。 学用イだ医客開ロ供界もぞだ実隆モイヌ務坂ナコヲ権野ろづ初場ぱ低会づぱじ新倒コ化政レ止奮浸猪ッわえづ。形いやリ要帰ほまむだ業領スル必打さ島14巻リ集日ネヘホタ面幅ち写上そぴ円図ムタコモ報使イわざと会催ヤヲ康証をドぶレ盤岡ホハツ作29管しをめ公問懐蓄っさ。来ゆぼあぱ投秋シ語右ぐ身靖かば辛握捕家記ヘワ神岐囲づ毘観メテクツ政73夕罪57需93誌飲査仁さ。 変レめ束球よんま会特ヱコ聞重だ史純ーどる件32浦レぴよゃ上強ネラリロ査従セユヤ専棋光レ作表ひぶ予正ぜーな誉確フス函6報円ス進治ね能営済否雄でわょ。42生型ば着続ア短実ぎおめび前環闘ラヤヲル診均っとにの声公トヱテマ整試椅情久妊舌頃ざとっく。品キチトテ阿国ラら受87世ヲフセリ川86個ーょぼげ危子ヘレカメ無会ぱかへ事通んかて電条ロツ徴商ぶぞそを居暑メ害広せもがり禁応レミヲ応響割壮憶はぱ。 千れンが織財メニ況界ネトレミ学豊フオホシ近月レたやご的罪ょな菱技ちる警栗エセ提89林危氷48参ア説森クキヒヱ薬社ホコエリ負和ルび紀下ケミイ掲歳特ごず扱底ク護木連ちクを各形ばすか。変ぱなれ町7融ヌ街準以タユヘム質裕ぶで遺語俊ぎずょ事金文キ写多山ーゆに歩帯すで会世クぜよ論写ヲ達71林危氷5間続ぎぜび高怠す。 係8青け応著ミ戦条ナヘネカ思79未ぎ算伊をゃ泉人ーづ需説っ畑鹿27軽ラソツ権2促千護ルロナカ開国ケ暴嶋ご池表だ。佐フナ訪麻はてせば勝効をあ医戦画とさわぴ者両すいあ並来んば載食ぴ件友頂業へえぞ魚祝ネラ聞率スコリケ始全ンこび夫出ドふ今布うぎふゅ実克即哉循やしんな。 暮す備54依紀てッん末刊と柔称むてス無府ケイ変壌をぱ汁連フマス海世ヌ中負知問ナヘケ純推ひ読着ヒ言若私軽れ。掲けフむ王本オコ線人をっさ必和断セソヲハ図芸ちかな防長りぶは投新意相ツ並5余セ職岳ぞ端古空援そ。森ヨエチ題5東っ自兄ち暴5近鹿横ト的京ハ安氷ナキ深際ぎ並節くスむの権工ほルせ京49効タムチ処三ぞぴラ済国ずっ文経ヘトミ水分準そが。".split(" ") 39 | corpus7 = "AbortMultipartUpload CompleteMultipartUpload CopyObject CreateBucket CreateMultipartUpload DeleteBucket DeleteBucketAnalyticsConfiguration DeleteBucketCors DeleteBucketEncryption DeleteBucketIntelligentTieringConfiguration DeleteBucketInventoryConfiguration DeleteBucketLifecycle DeleteBucketMetricsConfiguration DeleteBucketOwnershipControls DeleteBucketPolicy DeleteBucketReplication DeleteBucketTagging DeleteBucketWebsite DeleteObject DeleteObjects DeleteObjectTagging DeletePublicAccessBlock GetBucketAccelerateConfiguration GetBucketAcl GetBucketAnalyticsConfiguration GetBucketCors GetBucketEncryption GetBucketIntelligentTieringConfiguration GetBucketInventoryConfiguration GetBucketLifecycle GetBucketLifecycleConfiguration GetBucketLocation GetBucketLogging GetBucketMetricsConfiguration GetBucketNotification GetBucketNotificationConfiguration GetBucketOwnershipControls GetBucketPolicy GetBucketPolicyStatus GetBucketReplication GetBucketRequestPayment GetBucketTagging GetBucketVersioning GetBucketWebsite GetObject GetObjectAcl GetObjectAttributes GetObjectLegalHold GetObjectLockConfiguration GetObjectRetention GetObjectTagging GetObjectTorrent GetPublicAccessBlock HeadBucket HeadObject ListBucketAnalyticsConfigurations ListBucketIntelligentTieringConfigurations ListBucketInventoryConfigurations ListBucketMetricsConfigurations ListBuckets ListMultipartUploads ListObjects ListObjectsV2 ListObjectVersions ListParts PutBucketAccelerateConfiguration PutBucketAcl PutBucketAnalyticsConfiguration PutBucketCors PutBucketEncryption PutBucketIntelligentTieringConfiguration PutBucketInventoryConfiguration PutBucketLifecycle PutBucketLifecycleConfiguration PutBucketLogging PutBucketMetricsConfiguration PutBucketNotification PutBucketNotificationConfiguration PutBucketOwnershipControls PutBucketPolicy PutBucketReplication PutBucketRequestPayment PutBucketTagging PutBucketVersioning PutBucketWebsite PutObject PutObjectAcl PutObjectLegalHold PutObjectLockConfiguration PutObjectRetention PutObjectTagging PutPublicAccessBlock RestoreObject SelectObjectContent UploadPart UploadPartCopy WriteGetObjectResponse CreateAccessPoint CreateAccessPointForObjectLambda CreateBucket CreateJob CreateMultiRegionAccessPoint DeleteAccessPoint DeleteAccessPointForObjectLambda DeleteAccessPointPolicy DeleteAccessPointPolicyForObjectLambda DeleteBucket DeleteBucketLifecycleConfiguration DeleteBucketPolicy DeleteBucketReplication DeleteBucketTagging DeleteJobTagging DeleteMultiRegionAccessPoint DeletePublicAccessBlock DeleteStorageLensConfiguration DeleteStorageLensConfigurationTagging DescribeJob DescribeMultiRegionAccessPointOperation GetAccessPoint GetAccessPointConfigurationForObjectLambda GetAccessPointForObjectLambda GetAccessPointPolicy GetAccessPointPolicyForObjectLambda GetAccessPointPolicyStatus GetAccessPointPolicyStatusForObjectLambda GetBucket GetBucketLifecycleConfiguration GetBucketPolicy GetBucketReplication GetBucketTagging GetBucketVersioning GetJobTagging GetMultiRegionAccessPoint GetMultiRegionAccessPointPolicy GetMultiRegionAccessPointPolicyStatus GetMultiRegionAccessPointRoutes GetPublicAccessBlock GetStorageLensConfiguration GetStorageLensConfigurationTagging ListAccessPoints ListAccessPointsForObjectLambda ListJobs ListMultiRegionAccessPoints ListRegionalBuckets ListStorageLensConfigurations PutAccessPointConfigurationForObjectLambda PutAccessPointPolicy PutAccessPointPolicyForObjectLambda PutBucketLifecycleConfiguration PutBucketPolicy PutBucketReplication PutBucketTagging PutBucketVersioning PutJobTagging PutMultiRegionAccessPointPolicy PutPublicAccessBlock PutStorageLensConfiguration PutStorageLensConfigurationTagging SubmitMultiRegionAccessPointRoutes UpdateJobPriority UpdateJobStatus".split(" ") 40 | corps = [corpus1, corpus2, corpus3, corpus4, corpus5, corpus6, corpus7] 41 | for corpus in corps: 42 | benchmark(corpus) 43 | 44 | 45 | if __name__ == "__main__": 46 | main() -------------------------------------------------------------------------------- /my_utils/__init__.mojo: -------------------------------------------------------------------------------- 1 | from memory import memcmp 2 | from pathlib import Path 3 | 4 | fn int_cmp(a: UInt32, b: UInt32) -> Int: 5 | return int(a) - int(b) 6 | 7 | fn int_cmp64(a: UInt64, b: UInt64) -> Int: 8 | return int(a) - int(b) 9 | 10 | fn int_to_str(a: UInt32) -> String: 11 | return str(a) 12 | 13 | fn int_to_str64(a: UInt64) -> String: 14 | return str(a) 15 | 16 | fn cmp_strl(a: StringLiteral, b: StringLiteral) -> Int: 17 | var l = min(len(a), len(b)) 18 | var p1 = a.unsafe_ptr() 19 | var p2 = b.unsafe_ptr() 20 | var diff = memcmp(p1, p2, l) 21 | 22 | return diff if diff != 0 else len(a) - len(b) 23 | 24 | fn cmp_str(a: String, b: String) -> Int: 25 | var l = min(len(a), len(b)) 26 | var p1 = a.unsafe_ptr() 27 | var p2 = b.unsafe_ptr() 28 | var diff = memcmp(p1, p2, l) 29 | 30 | return diff if diff != 0 else len(a) - len(b) 31 | 32 | fn stsl(a: StringLiteral) -> String: 33 | return a 34 | 35 | fn corpus1() raises -> List[String]: 36 | return String('Lorem ipsum dolor sit amet, consectetur adipiscing elit. Quisque orci urna, pretium et porta ac, porttitor sit amet sem. Fusce sagittis lorem neque, vitae sollicitudin elit suscipit et. In interdum convallis nisl in ornare. Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia curae; Aliquam erat volutpat. Morbi mollis iaculis lectus ac tincidunt. Fusce nisi lacus, semper eu dignissim et, malesuada non mi. Sed euismod urna vel elit faucibus, eu bibendum ante fringilla. Curabitur tempus in turpis at mattis. Aliquam erat volutpat. Donec maximus elementum felis, sit amet dignissim augue tincidunt blandit. Aliquam fermentum, est eu mollis.').split(" ") 37 | 38 | fn corpus2() raises -> List[String]: 39 | return String('But I must explain to you how all this mistaken idea of denouncing pleasure and praising pain was born and I will give you a complete account of the system, and expound the actual teachings of the great explorer of the truth, the master-builder of human happiness. No one rejects, dislikes, or avoids pleasure itself, because it is pleasure, but because those who do not know how to pursue pleasure rationally encounter consequences that are extremely painful. Nor again is there anyone who loves or pursues or desires to obtain pain of itself, because it is pain, but because occasionally circumstances occur in which toil and pain can procure him some great pleasure. To take a trivial example, which of us ever undertakes laborious physical exercise, except to obtain some advantage from it? But who has any right to find fault with a man who chooses to enjoy a pleasure that has no annoying consequences, or one who avoids a pain that produces no resultant pleasure? On the other hand, we denounce with righteous indignation and dislike men who are so beguiled and demoralized by the charms of pleasure of the moment, so blinded by desire, that they cannot foresee the pain and trouble that are bound to ensue; and equal blame belongs to those who fail in their duty through weakness of will, which is the same as saying through shrinking from toil and pain. These cases are perfectly simple and easy to distinguish. In a free hour, when our power of choice is untrammelled and when nothing prevents our being able to do what we like best, every pleasure is to be welcomed and every pain avoided. But in certain circumstances and owing to the claims of duty or the obligations of business it will frequently occur that pleasures have to be repudiated and annoyances accepted. The wise man therefore always holds in these matters to this principle of selection: he rejects pleasures to secure other greater pleasures, or else he endures pains to avoid worse pains. But I must explain to you how all this mistaken idea of denouncing pleasure and praising pain was born and I will give you a complete account of the system, and expound the actual teachings of the great explorer of the truth, the master-builder of human happiness. No one rejects, dislikes, or avoids pleasure itself, because it is pleasure, but because those who do not know how to pursue pleasure rationally encounter consequences that are extremely painful. Nor again is there anyone who loves or pursues or desires to obtain pain of itself, because it is pain, but because occasionally circumstances occur in which toil and pain can procure him some great pleasure. To take a trivial example, which of us ever undertakes laborious physical exercise, except to obtain some advantage from it? But who has any right to find fault with a man who chooses to enjoy a pleasure that has no annoying consequences, or one who avoids a pain that produces no resultant pleasure? On the other hand, we denounce with righteous indignation and dislike men who are so beguiled and demoralized by the charms of pleasure of the moment, so blinded by desire, that they cannot foresee the pain and trouble that are bound to ensue; and equal blame belongs to those who fail in their duty through weakness of will, which is the same as saying through shrinking from toil and pain. These cases are perfectly simple and easy to distinguish. In a free hour, when our power of choice is untrammelled and when nothing prevents our being able to do what we like best, every pleasure is to be welcomed and every pain avoided. But in certain circumstances and owing to the claims of duty or the obligations of business it will frequently occur that pleasures have to be repudiated and annoyances accepted. The wise man therefore always holds in these matters to this principle of selection: he rejects pleasures to secure other greater pleasures, or else he endures pains to avoid worse pains.But I must explain to you how all this mistaken idea of denouncing pleasure and praising pain was born and I will give you a complete account of the system, and expound the actual teachings of the great explorer of the truth, the master-builder of human happiness. No one rejects, dislikes, or avoids pleasure itself, because it is pleasure, but because those who do not know how to pursue pleasure rationally encounter consequences that are extremely painful. Nor again is there anyone who loves or pursues or desires to obtain pain of itself, because it is pain, but because occasionally circumstances occur in which toil and pain can procure him some great pleasure. To take a trivial example, which of us ever undertakes laborious physical exercise, except to obtain some advantage from it? But who has any right to find fault with a man who chooses to enjoy a pleasure that has no annoying consequences, or one who avoids a pain that produces no resultant pleasure? On the other hand, we denounce with righteous indignation and dislike men who are so beguiled and demoralized by the charms of pleasure of the moment, so blinded by desire, that they cannot foresee the pain and trouble that are bound to ensue; and equal blame belongs to those who fail in their duty through weakness of will, which is the same as saying through shrinking from toil and pain. These cases are perfectly simple and easy to distinguish. In a free hour, when our power of choice is untrammelled and when nothing prevents our being able to do what we like best, every pleasure is to be welcomed and every pain avoided. But in certain circumstances and owing to the claims of duty or the obligations of business it will frequently occur that pleasures have to be repudiated and annoyances accepted. The wise man therefore always holds in these matters to this principle of selection:').split(" ") 40 | 41 | fn corpus3() raises -> List[String]: 42 | return String('A wonderful serenity has taken possession of my entire soul, like these sweet mornings of spring which I enjoy with my whole heart. I am alone, and feel the charm of existence in this spot, which was created for the bliss of souls like mine. I am so happy, my dear friend, so absorbed in the exquisite sense of mere tranquil existence, that I neglect my talents. I should be incapable of drawing a single stroke at the present moment; and yet I feel that I never was a greater artist than now. When, while the lovely valley teems with vapour around me, and the meridian sun strikes the upper surface of the impenetrable foliage of my trees, and but a few stray gleams steal into the inner sanctuary, I throw myself down among the tall grass by the trickling stream; and, as I lie close to the earth, a thousand unknown plants are noticed by me: when I hear the buzz of the little world among the stalks, and grow familiar with the countless indescribable forms of the insects and flies, then I feel the presence of the Almighty, who formed us in his own image, and the breath of that universal love which bears and sustains us, as it floats around us in an eternity of bliss; and then, my friend, when darkness overspreads my eyes, and heaven and earth seem to dwell in my soul and absorb its power, like the form of a beloved mistress, then I often think with longing, Oh, would I could describe these conceptions, could impress upon paper all that is living so full and warm within me, that it might be the mirror of my soul, as my soul is the mirror of the infinite God! O my friend -- but it is too much for my strength -- I sink under the weight of the splendour of these visions! A wonderful serenity has taken possession of my entire soul, like these sweet mornings of spring which I enjoy with my whole heart. I am alone, and feel the charm of existence in this spot, which was created for the bliss of souls like mine. I am so happy, my dear friend, so absorbed in the exquisite sense of mere tranquil existence, that I neglect my talents. I should be incapable of drawing a single stroke at the present moment; and yet I feel that I never was a greater artist than now. When, while the lovely valley teems with vapour around me, and the meridian sun strikes the upper surface of the impenetrable foliage of my trees, and but a few stray gleams steal into the inner sanctuary, I throw myself down among the tall grass by the trickling stream; and, as I lie close to the earth, a thousand unknown plants are noticed by me: when I hear the buzz of the little world among the stalks, and grow familiar with the countless indescribable forms of the insects and flies, then I feel the presence of the Almighty, who formed us in his own image, and the breath of that universal love which bears and sustains us, as it floats around us in an eternity of bliss; and then, my friend, when darkness overspreads my eyes, and heaven and earth seem to dwell in my soul and absorb its power, like the form of a beloved mistress, then I often think with longing, Oh, would I could describe these conceptions, could impress upon paper all that is living so full and warm within me, that it might be the mirror of my soul, as my soul is the mirror of the infinite God! O my friend -- but it is too much for my strength -- I sink under the weight of the splendour of these visions! A wonderful serenity has taken possession of my entire soul, like these sweet mornings of spring which I enjoy with my whole heart. I am alone, and feel the charm of existence in this spot, which was created for the bliss of souls like mine. I am so happy, my dear friend, so absorbed in the exquisite sense of mere tranquil existence, that I neglect my talents. I should be incapable of drawing a single stroke at the present moment; and yet I feel that I never was a greater artist than now. When, while the lovely valley teems with vapour around me, and the meridian sun strikes the upper surface of the impenetrable foliage of my trees, and but a few stray gleams steal into the inner sanctuary, I throw myself down among the tall grass by the trickling stream; and, as I lie close to the earth, a thousand unknown plants are noticed by me: when I hear the buzz of the little world among the stalks, and grow familiar with the countless indescribable forms of the insects and flies, then I feel the presence of the Almighty, who formed us in his own image, and the breath of that universal love which bears and sustains us, as it floats around us in an eternity of bliss; and then, my friend, when darkness overspreads my eyes, and heaven and earth seem to dwell in my soul and absorb its power, like the form of a beloved mistress, then I often think with longing, Oh, would I could describe these conceptions, could impress upon paper all that is living so full and warm within me, that it might be the mirror of my soul, as my soul is the mirror of the infinite God! O my friend -- but it is too much for my strength -- I sink under the weight of the splendour of these visions!A wonderful serenity has taken possession of my entire soul, like these sweet mornings of spring which I enjoy with my whole heart. I am alone, and feel the charm of existence in this spot, which was created for the bliss of souls').split(" ") 43 | 44 | fn corpus4() raises -> List[String]: 45 | return String('Проснувшись однажды утром после беспокойного сна, Грегор Замза обнаружил, что он у себя в постели превратился в страшное насекомое. Лежа на панцирнотвердой спине, он видел, стоило ему приподнять голову, свой коричневый, выпуклый, разделенный дугообразными чешуйками живот, на верхушке которого еле держалось готовое вот-вот окончательно сползти одеяло. Его многочисленные, убого тонкие по сравнению с остальным телом ножки беспомощно копошились у него перед глазами. «Что со мной случилось?» – подумал он. Это не было сном. Его комната, настоящая, разве что слишком маленькая, но обычная комната, мирно покоилась в своих четырех хорошо знакомых стенах. Над столом, где были разложены распакованные образцы сукон – Замза был коммивояжером, – висел портрет, который он недавно вырезал из иллюстрированного журнала и вставил в красивую золоченую рамку. На портрете была изображена дама в меховой шляпе и боа, она сидела очень прямо и протягивала зрителю тяжелую меховую муфту, в которой целиком исчезала ее рука. Затем взгляд Грегора устремился в окно, и пасмурная погода – слышно было, как по жести подоконника стучат капли дождя – привела его и вовсе в грустное настроение. «Хорошо бы еще немного поспать и забыть всю эту чепуху», – подумал он, но это было совершенно неосуществимо, он привык спать на правом боку, а в теперешнем своем состоянии он никак не мог принять этого положения. С какой бы силой ни поворачивался он на правый бок, он неизменно сваливался опять на спину. Закрыв глаза, чтобы не видеть своих барахтающихся ног, он проделал это добрую сотню раз и отказался от этих попыток только тогда, когда почувствовал какую-то неведомую дотоле, тупую и слабую боль в боку. «Ах ты, господи, – подумал он, – какую я выбрал хлопотную профессию! Изо дня в день в разъездах. Деловых волнений куда больше, чем на месте, в торговом доме, а кроме того, изволь терпеть тяготы дороги, думай о расписании поездов, мирись с плохим, нерегулярным питанием, завязывай со все новыми и новыми людьми недолгие, никогда не бывающие сердечными отношения. Черт бы побрал все это!» Он почувствовал вверху живота легкий зуд; медленно подвинулся на спине к прутьям кровати, чтобы удобнее было поднять голову; нашел зудевшее место, сплошь покрытое, как оказалось, белыми непонятными точечками; хотел было ощупать это место одной из ножек, но сразу отдернул ее, ибо даже простое прикосновение вызвало у него, Грегора, озноб. Он соскользнул в прежнее свое положение. «От этого раннего вставания, – подумал он, – можно совсем обезуметь. Человек должен высыпаться. Другие коммивояжеры живут, как одалиски. Когда я, например, среди дня возвращаюсь в гостиницу, чтобы переписать полученные заказы, эти господа только завтракают. А осмелься я вести себя так, мои хозяин выгнал бы меня сразу. Кто знает, впрочем, может быть, это было бы даже очень хорошо для меня. Если бы я не сдерживался ради родителей, я бы давно заявил об уходе, я бы подошел к своему хозяину и выложил ему все, что о нем думаю. Он бы так и свалился с конторки! Странная у него манера – садиться на конторку и с ее высоты разговаривать со служащим, который вдобавок вынужден подойти вплотную к конторке из-за того, что хозяин туг на ухо. Однако надежда еще не совсем потеряна: как только я накоплю денег, чтобы выплатить долг моих родителей – на это уйдет еще лет пять-шесть, – я так и поступлю. Тут-то мы и распрощаемся раз и навсегда. А пока что надо подниматься, мой поезд отходит в пять». И он взглянул на будильник, который тикал на сундуке. «Боже правый!» – подумал он. Было половина седьмого, и стрелки спокойно двигались дальше, было даже больше половины, без малого уже три четверти. Неужели будильник не звонил? С кровати было видно, что он поставлен правильно, на четыре часа; и он, несомненно, звонил. Но как можно было спокойно спать под этот сотрясающий мебель трезвон? Ну, спал-то он неспокойно, но, видимо, крепко. Однако что делать теперь? Следующий поезд уходит в семь часов; чтобы поспеть на него, он должен отчаянно торопиться, а набор образцов еще не упакован, да и сам он отнюдь не чувствует себя свежим и легким на подъем. И даже поспей он на поезд, хозяйского разноса ему все равно не избежать – ведь рассыльный торгового дома дежурил у пятичасового поезда и давно доложил о его, Грегора, опоздании. Рассыльный, человек бесхарактерный и неумный, был ставленником хозяина. А что, если сказаться больным? Но это было бы крайне неприятно и показалось бы подозрительным, ибо за пятилетнюю свою службу Грегор ни разу еще не болел. Хозяин, конечно, привел бы врача больничной кассы и стал попрекать родителей сыном-лентяем, отводя любые возражения ссылкой на этого врача, по мнению которого все люди на свете совершенно здоровы и только не любят работать. И разве в данном случае он был бы так уж неправ? Если не считать сонливости, действительно странной после такого долгого сна, Грегор и в самом деле чувствовал себя превосходно и был даже чертовски голоден.Проснувшись однажды утром после беспокойного сна, Грегор Замза обнаружил, что он у себя в постели превратился в страшное насекомое. Лежа на панцирнотвердой спине, он видел, стоило ему приподнять голову, свой коричневый, выпуклый, разделенный дугообразными чешуйками живот, на верхушке которого еле держалось готовое вот-вот окончательно сползти одеяло. Его многочисленные, убого тонкие по сравнению с остальным телом ножки беспомощно копошились у него перед глазами. «Что со мной случилось?» – подумал он. Это не было сном. Его комната, настоящая, разве что слишком маленькая, но обычная комната, мирно покоилась в своих четырех хорошо знакомых стенах. Над столом, где были разложены распакованные образцы сукон – Замза был коммивояжером, – висел портрет, который он недавно вырезал из иллюстрированного журнала и вставил в красивую золоченую рамку. На портрете была изображена дама в меховой шляпе и боа, она сидела очень прямо и протягивала зрителю тяжелую меховую муфту, в которой целиком исчезала ее рука. Затем взгляд Грегора устремился в окно, и пасмурная погода – слышно было, как по жести подоконника стучат капли дождя – привела его и вовсе в грустное настроение. «Хорошо бы еще немного поспать и забыть всю эту чепуху», – подумал он, но это было совершенно неосуществимо, он привык спать на правом боку, а в теперешнем своем состоянии он никак не мог принять этого положения. С какой бы силой ни поворачивался он на правый бок, он неизменно сваливался опять на спину.').split(" ") 46 | 47 | fn corpus5() raises -> List[String]: 48 | return String('Weit hinten, hinter den Wortbergen, fern der Länder Vokalien und Konsonantien leben die Blindtexte. Abgeschieden wohnen sie in Buchstabhausen an der Küste des Semantik, eines großen Sprachozeans. Ein kleines Bächlein namens Duden fließt durch ihren Ort und versorgt sie mit den nötigen Regelialien. Es ist ein paradiesmatisches Land, in dem einem gebratene Satzteile in den Mund fliegen. Nicht einmal von der allmächtigen Interpunktion werden die Blindtexte beherrscht – ein geradezu unorthographisches Leben. Eines Tages aber beschloß eine kleine Zeile Blindtext, ihr Name war Lorem Ipsum, hinaus zu gehen in die weite Grammatik. Der große Oxmox riet ihr davon ab, da es dort wimmele von bösen Kommata, wilden Fragezeichen und hinterhältigen Semikoli, doch das Blindtextchen ließ sich nicht beirren. Es packte seine sieben Versalien, schob sich sein Initial in den Gürtel und machte sich auf den Weg. Als es die ersten Hügel des Kursivgebirges erklommen hatte, warf es einen letzten Blick zurück auf die Skyline seiner Heimatstadt Buchstabhausen, die Headline von Alphabetdorf und die Subline seiner eigenen Straße, der Zeilengasse. Wehmütig lief ihm eine rhetorische Frage über die Wange, dann setzte es seinen Weg fort. Unterwegs traf es eine Copy. Die Copy warnte das Blindtextchen, da, wo sie herkäme wäre sie zigmal umgeschrieben worden und alles, was von ihrem Ursprung noch übrig wäre, sei das Wort "und" und das Blindtextchen solle umkehren und wieder in sein eigenes, sicheres Land zurückkehren. Doch alles Gutzureden konnte es nicht überzeugen und so dauerte es nicht lange, bis ihm ein paar heimtückische Werbetexter auflauerten, es mit Longe und Parole betrunken machten und es dann in ihre Agentur schleppten, wo sie es für ihre Projekte wieder und wieder mißbrauchten. Und wenn es nicht umgeschrieben wurde, dann benutzen Sie es immernoch. Weit hinten, hinter den Wortbergen, fern der Länder Vokalien und Konsonantien leben die Blindtexte. Abgeschieden wohnen sie in Buchstabhausen an der Küste des Semantik, eines großen Sprachozeans. Ein kleines Bächlein namens Duden fließt durch ihren Ort und versorgt sie mit den nötigen Regelialien. Es ist ein paradiesmatisches Land, in dem einem gebratene Satzteile in den Mund fliegen. Nicht einmal von der allmächtigen Interpunktion werden die Blindtexte beherrscht – ein geradezu unorthographisches Leben. Eines Tages aber beschloß eine kleine Zeile Blindtext, ihr Name war Lorem Ipsum, hinaus zu gehen in die weite Grammatik. Der große Oxmox riet ihr davon ab, da es dort wimmele von bösen Kommata, wilden Fragezeichen und hinterhältigen Semikoli, doch das Blindtextchen ließ sich nicht beirren. Es packte seine sieben Versalien, schob sich sein Initial in den Gürtel und machte sich auf den Weg. Als es die ersten Hügel des Kursivgebirges erklommen hatte, warf es einen letzten Blick zurück auf die Skyline seiner Heimatstadt Buchstabhausen, die Headline von Alphabetdorf und die Subline seiner eigenen Straße, der Zeilengasse. Wehmütig lief ihm eine rhetorische Frage über die Wange, dann setzte es seinen Weg fort. Unterwegs traf es eine Copy. Die Copy warnte das Blindtextchen, da, wo sie herkäme wäre sie zigmal umgeschrieben worden und alles, was von ihrem Ursprung noch übrig wäre, sei das Wort "und" und das Blindtextchen solle umkehren und wieder in sein eigenes, sicheres Land zurückkehren. Doch alles Gutzureden konnte es nicht überzeugen und so dauerte es nicht lange, bis ihm ein paar heimtückische Werbetexter auflauerten, es mit Longe und Parole betrunken machten und es dann in ihre Agentur schleppten, wo sie es für ihre Projekte wieder und wieder mißbrauchten. Und wenn es nicht umgeschrieben wurde, dann benutzen Sie es immernoch. Weit hinten, hinter den Wortbergen, fern der Länder Vokalien und Konsonantien leben die Blindtexte. Abgeschieden wohnen sie in Buchstabhausen an der Küste des Semantik, eines großen Sprachozeans. Ein kleines Bächlein namens Duden fließt durch ihren Ort und versorgt sie mit den nötigen Regelialien. Es ist ein paradiesmatisches Land, in dem einem gebratene Satzteile in den Mund fliegen. Nicht einmal von der allmächtigen Interpunktion werden die Blindtexte beherrscht – ein geradezu unorthographisches Leben. Eines Tages aber beschloß eine kleine Zeile Blindtext, ihr Name war Lorem Ipsum, hinaus zu gehen in die weite Grammatik. Der große Oxmox riet ihr davon ab, da es dort wimmele von bösen Kommata, wilden Fragezeichen und hinterhältigen Semikoli, doch das Blindtextchen ließ sich nicht beirren. Es packte seine sieben Versalien, schob sich sein Initial in den Gürtel und machte sich auf den Weg. Als es die ersten Hügel des Kursivgebirges erklommen hatte, warf es einen letzten Blick zurück auf die Skyline seiner Heimatstadt Buchstabhausen, die Headline von Alphabetdorf und die Subline seiner eigenen Straße, der Zeilengasse. Wehmütig lief ihm eine rhetorische Frage über die Wange, dann setzte es seinen Weg fort. Unterwegs traf es eine Copy. Die Copy warnte das Blindtextchen, da, wo sie herkäme wäre sie zigmal umgeschrieben worden und alles, was von ihrem Ursprung noch übrig wäre, sei das Wort "und" und das Blindtextchen solle umkehren und wieder in sein eigenes, sicheres Land zurückkehren. Doch alles Gutzureden konnte es nicht überzeugen und so dauerte es nicht lange, bis ihm ein paar heimtückische Werbetexter auflauerten, es mit Longe und Parole betrunken machten und es dann in ihre Agentur schleppten, wo sie es für ihre Projekte wieder und wieder mißbrauchten. Und wenn es nicht umgeschrieben wurde, dann benutzen Sie es immernoch.Weit hinten, hinter den Wortbergen, fern der Länder Vokalien und Konsonantien leben die Blindtexte. Abgeschieden wohnen sie in Buchstabhausen an der Küste des Semantik, eines großen Sprachozeans. Ein kleines Bächlein namens Duden fließt durch ihren Ort und versorgt sie mit den nötigen Regelialien. Es ist ein paradiesmatisches Land, in dem einem gebratene Satzteile in den Mund fliegen. Nicht einmal von der allmächtigen Interpunktion werden die Blindtexte beherrscht – ein geradezu unorthographisches Leben. Eines Tages aber beschloß eine kleine Zeile Blindtext, ihr Name war Lorem Ipsum, hinaus zu gehen in die weite Grammatik. Der große Oxmox riet ihr davon ab, da es dort wimmele von bösen Kommata, wilden Fragezeichen und hinterhältigen Semikoli, doch das Blindtextchen ließ sich nicht beirren. Es packte seine sieben Versalien, schob sich sein Initial in den Gürtel und machte sich auf den Weg. Als es die ersten Hügel des Kursivgebirges erklommen hatte, warf es einen').split(" ") 49 | 50 | fn corpus6() raises -> List[String]: 51 | return String('米くを舵4物委らご氏松ハナテフ月関ソ時平ふいの博情れじフ牟万い元56園フメヤオ試図ロツヤ未備王こと傷喫羅踊んゆし。栃ユヱオ書著作ユソツロ英祉業ア大課ご権質フべ空8午キ切軟づン著郎そゃす格町採ヱオマコ処8付国ムハチア究表でなだ際無ロミヱ地兵ぴげ庭体すク発抜爆位や。楽富むゆず盛航カナセ携代ハ本高きた員59今骸ンラえぜ城解イケ穴訴ぽぎ属住ヤケトヌ抱点ト広注厚でて。 国リ出難セユメ軍手ヘカウ画形サヲシ猛85用ヲキミ心死よしと身処ケヨミオ教主ーぽ事業んく字国たさょ図能シミスヤ社8板ル岡世58次戒知院んれり。市メ誘根カ数問禁竹ゃれえみ給辺のでみき今二ぎさ裕止過こクすと無32郎所ラた生展ヌヘス成度慣葬勇厘ばてか。室ゃ下携疲ム色権がぽりっ銃週ノオ姫千テム健蔵い研手ッ放容ル告属め旅側26企サノヨ宅都福ぞ通待ちぴね種脳イど労希望義通むン。 罰しい続負せ著低たル異師ユハワ東添質コチ転集ルヤ雇聴約ヒ前統らた情厳ゆさでや真胸や有披暑棚豆ゆぼたけ。盛ワセロナ情競クるっわ講3音ずをせ少地めしぜょ手63明視れに判企ヒヌエソ求総58特本ね井比ユラキ禁頭馬るゅリす能率率かがさわ。葉サソ医郡ヱヘソ労帰ナケスミ救写ワヘ株審ネヒニミ安逮イ人画ラ涯車はラ極騒りなド件5級ンかふー劇41著ぱぐ凱討だ文世ぶづどま界善魅マ渓経競融れがや。 連ーぜらご模分ッ視外ばフく運発群ほぼづ育越一ほごクけ案募ヲイソ治会イせフ製君ぜた漢村1変リヒ構5際ツ御文ヲ臭入さドぼ代書ハケ引技ろみれ回観注倉徹ぱ。論ラづ海要サ情座ゃり齢宣ラモエ芸化エマホ覧催回ら戦69本外ト葬岳な政画か連針ぴリフず。約ル闘辺ぽ経2応掲ホサアラ塾小コラ画決クノオ上室レヌヱ勝逮ぜるえむ責豊チノ明意ひけ訟6碁草メタチエ財午召喝塊む。 決めでわ名金つけレわ続人県約ぽぼす尾腹ユサ戦載リシ護賀レモフツ重涯ニ治者むんっみ職更カタチレ提話2何ワ責東まけげふ能政ヌ供禁がびてわ提改倶れめ。読み担後ぽ安加ぎ論鹿ツ統最お気麻月つじもあ竜思いろめ判必満理トコ文連ムイウハ寄串ざほびー。文ゆこっ向27年メイ便能ノセヲ待1王スねたゆ伝派んね点過カト治読よにきべ使人スシ都言え阻8割べづえみ注引敷的岳犠眠どそ。 学用イだ医客開ロ供界もぞだ実隆モイヌ務坂ナコヲ権野ろづ初場ぱ低会づぱじ新倒コ化政レ止奮浸猪ッわえづ。形いやリ要帰ほまむだ業領スル必打さ島14巻リ集日ネヘホタ面幅ち写上そぴ円図ムタコモ報使イわざと会催ヤヲ康証をドぶレ盤岡ホハツ作29管しをめ公問懐蓄っさ。来ゆぼあぱ投秋シ語右ぐ身靖かば辛握捕家記ヘワ神岐囲づ毘観メテクツ政73夕罪57需93誌飲査仁さ。 変レめ束球よんま会特ヱコ聞重だ史純ーどる件32浦レぴよゃ上強ネラリロ査従セユヤ専棋光レ作表ひぶ予正ぜーな誉確フス函6報円ス進治ね能営済否雄でわょ。42生型ば着続ア短実ぎおめび前環闘ラヤヲル診均っとにの声公トヱテマ整試椅情久妊舌頃ざとっく。品キチトテ阿国ラら受87世ヲフセリ川86個ーょぼげ危子ヘレカメ無会ぱかへ事通んかて電条ロツ徴商ぶぞそを居暑メ害広せもがり禁応レミヲ応響割壮憶はぱ。 千れンが織財メニ況界ネトレミ学豊フオホシ近月レたやご的罪ょな菱技ちる警栗エセ提89林危氷48参ア説森クキヒヱ薬社ホコエリ負和ルび紀下ケミイ掲歳特ごず扱底ク護木連ちクを各形ばすか。変ぱなれ町7融ヌ街準以タユヘム質裕ぶで遺語俊ぎずょ事金文キ写多山ーゆに歩帯すで会世クぜよ論写ヲ達71林危氷5間続ぎぜび高怠す。 係8青け応著ミ戦条ナヘネカ思79未ぎ算伊をゃ泉人ーづ需説っ畑鹿27軽ラソツ権2促千護ルロナカ開国ケ暴嶋ご池表だ。佐フナ訪麻はてせば勝効をあ医戦画とさわぴ者両すいあ並来んば載食ぴ件友頂業へえぞ魚祝ネラ聞率スコリケ始全ンこび夫出ドふ今布うぎふゅ実克即哉循やしんな。 暮す備54依紀てッん末刊と柔称むてス無府ケイ変壌をぱ汁連フマス海世ヌ中負知問ナヘケ純推ひ読着ヒ言若私軽れ。掲けフむ王本オコ線人をっさ必和断セソヲハ図芸ちかな防長りぶは投新意相ツ並5余セ職岳ぞ端古空援そ。森ヨエチ題5東っ自兄ち暴5近鹿横ト的京ハ安氷ナキ深際ぎ並節くスむの権工ほルせ京49効タムチ処三ぞぴラ済国ずっ文経ヘトミ水分準そが。').split(" ") 52 | 53 | fn corpus7() raises -> List[String]: 54 | return String('AbortMultipartUpload CompleteMultipartUpload CopyObject CreateBucket CreateMultipartUpload DeleteBucket DeleteBucketAnalyticsConfiguration DeleteBucketCors DeleteBucketEncryption DeleteBucketIntelligentTieringConfiguration DeleteBucketInventoryConfiguration DeleteBucketLifecycle DeleteBucketMetricsConfiguration DeleteBucketOwnershipControls DeleteBucketPolicy DeleteBucketReplication DeleteBucketTagging DeleteBucketWebsite DeleteObject DeleteObjects DeleteObjectTagging DeletePublicAccessBlock GetBucketAccelerateConfiguration GetBucketAcl GetBucketAnalyticsConfiguration GetBucketCors GetBucketEncryption GetBucketIntelligentTieringConfiguration GetBucketInventoryConfiguration GetBucketLifecycle GetBucketLifecycleConfiguration GetBucketLocation GetBucketLogging GetBucketMetricsConfiguration GetBucketNotification GetBucketNotificationConfiguration GetBucketOwnershipControls GetBucketPolicy GetBucketPolicyStatus GetBucketReplication GetBucketRequestPayment GetBucketTagging GetBucketVersioning GetBucketWebsite GetObject GetObjectAcl GetObjectAttributes GetObjectLegalHold GetObjectLockConfiguration GetObjectRetention GetObjectTagging GetObjectTorrent GetPublicAccessBlock HeadBucket HeadObject ListBucketAnalyticsConfigurations ListBucketIntelligentTieringConfigurations ListBucketInventoryConfigurations ListBucketMetricsConfigurations ListBuckets ListMultipartUploads ListObjects ListObjectsV2 ListObjectVersions ListParts PutBucketAccelerateConfiguration PutBucketAcl PutBucketAnalyticsConfiguration PutBucketCors PutBucketEncryption PutBucketIntelligentTieringConfiguration PutBucketInventoryConfiguration PutBucketLifecycle PutBucketLifecycleConfiguration PutBucketLogging PutBucketMetricsConfiguration PutBucketNotification PutBucketNotificationConfiguration PutBucketOwnershipControls PutBucketPolicy PutBucketReplication PutBucketRequestPayment PutBucketTagging PutBucketVersioning PutBucketWebsite PutObject PutObjectAcl PutObjectLegalHold PutObjectLockConfiguration PutObjectRetention PutObjectTagging PutPublicAccessBlock RestoreObject SelectObjectContent UploadPart UploadPartCopy WriteGetObjectResponse", "CreateAccessPoint CreateAccessPointForObjectLambda CreateBucket CreateJob CreateMultiRegionAccessPoint DeleteAccessPoint DeleteAccessPointForObjectLambda DeleteAccessPointPolicy DeleteAccessPointPolicyForObjectLambda DeleteBucket DeleteBucketLifecycleConfiguration DeleteBucketPolicy DeleteBucketReplication DeleteBucketTagging DeleteJobTagging DeleteMultiRegionAccessPoint DeletePublicAccessBlock DeleteStorageLensConfiguration DeleteStorageLensConfigurationTagging DescribeJob DescribeMultiRegionAccessPointOperation GetAccessPoint GetAccessPointConfigurationForObjectLambda GetAccessPointForObjectLambda GetAccessPointPolicy GetAccessPointPolicyForObjectLambda GetAccessPointPolicyStatus GetAccessPointPolicyStatusForObjectLambda GetBucket GetBucketLifecycleConfiguration GetBucketPolicy GetBucketReplication GetBucketTagging GetBucketVersioning GetJobTagging GetMultiRegionAccessPoint GetMultiRegionAccessPointPolicy GetMultiRegionAccessPointPolicyStatus GetMultiRegionAccessPointRoutes GetPublicAccessBlock GetStorageLensConfiguration GetStorageLensConfigurationTagging ListAccessPoints ListAccessPointsForObjectLambda ListJobs ListMultiRegionAccessPoints ListRegionalBuckets ListStorageLensConfigurations PutAccessPointConfigurationForObjectLambda PutAccessPointPolicy PutAccessPointPolicyForObjectLambda PutBucketLifecycleConfiguration PutBucketPolicy PutBucketReplication PutBucketTagging PutBucketVersioning PutJobTagging PutMultiRegionAccessPointPolicy PutPublicAccessBlock PutStorageLensConfiguration PutStorageLensConfigurationTagging SubmitMultiRegionAccessPointRoutes UpdateJobPriority UpdateJobStatus').split(" ") 55 | 56 | fn corpus8() raises -> List[String]: 57 | var text = Path("/usr/share/dict/words").read_text() 58 | # print("Text:", len(text)) 59 | return text.splitlines() --------------------------------------------------------------------------------