├── src ├── keygen │ ├── .gitignore │ ├── Cargo.toml │ ├── Cargo.lock │ └── src │ │ └── main.rs ├── sepe-runner │ ├── .gitignore │ ├── Cargo.toml │ ├── Cargo.lock │ └── src │ │ └── main.rs ├── keysynth │ ├── .gitignore │ └── Makefile ├── keybuilder │ ├── .gitignore │ ├── Makefile │ └── src │ │ └── main.c └── keyuser │ ├── .gitignore │ ├── src │ ├── google-hashes │ │ ├── citycrc.hpp │ │ ├── city.hpp │ │ └── farm.hpp │ ├── gperf-hashes │ │ ├── gperf-hashes.h │ │ ├── url_fixed_pattern.c │ │ ├── cpf.c │ │ ├── mac.c │ │ ├── ipv6.c │ │ ├── url_complex.c │ │ ├── ipv4.c │ │ ├── ssn.c │ │ └── ints.c │ ├── registry.hpp │ ├── main.cpp │ ├── customHashes.hpp │ ├── benchmarks.cpp │ └── benchmarks.hpp │ └── Makefile ├── scripts ├── .gitignore ├── align_csv.sh ├── make_hash_from_regex.sh ├── install_abseil.sh ├── distribution_benchmark.sh ├── benchmark.sh ├── keyuser_interpreter.py └── global_keyuser_interpreter.py ├── assets └── images │ └── SepeBanner.png ├── .gitignore ├── results └── gpt-prompts │ ├── ssn.txt │ ├── url_fixed_pattern.txt │ ├── cpf.txt │ ├── ipv4.txt │ ├── url_complex.txt │ ├── ints.txt │ ├── ipv6.txt │ └── mac.txt ├── Regexes.toml ├── Makefile └── README.md /src/keygen/.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | -------------------------------------------------------------------------------- /src/sepe-runner/.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | -------------------------------------------------------------------------------- /src/keysynth/.gitignore: -------------------------------------------------------------------------------- 1 | keysynth 2 | keysynth-debug 3 | -------------------------------------------------------------------------------- /src/keybuilder/.gitignore: -------------------------------------------------------------------------------- 1 | keybuilder 2 | keybuilder-debug 3 | -------------------------------------------------------------------------------- /src/keyuser/.gitignore: -------------------------------------------------------------------------------- 1 | keyuser 2 | keyuser-debug 3 | obj/ 4 | .vscode 5 | -------------------------------------------------------------------------------- /scripts/.gitignore: -------------------------------------------------------------------------------- 1 | array*.py 2 | *.png 3 | *.svg 4 | __pycache__/ 5 | *histogram* 6 | *.csv 7 | -------------------------------------------------------------------------------- /assets/images/SepeBanner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lac-dcc/sepe/HEAD/assets/images/SepeBanner.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .vscode 2 | *.csv 3 | bin/ 4 | output/ 5 | results/ 6 | deps/ 7 | __pycache__/ 8 | *.png 9 | *.svg 10 | *_distribution.py 11 | -------------------------------------------------------------------------------- /scripts/align_csv.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | 5 | if [ $# -lt 1 ]; then 6 | echo "usage: $0 " 7 | fi 8 | 9 | ALIGNED="$(column -o' ' -t -s, "$1")" 10 | echo "$ALIGNED" | head -n 1 11 | echo "$ALIGNED" | tail -n +2 | sort -k 8 12 | -------------------------------------------------------------------------------- /src/sepe-runner/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "sepe-runner" 3 | version = "0.1.0" 4 | edition = "2021" 5 | authors = ["Leonardo G. Faé", "Renato B. Hoffmann"] 6 | 7 | [dependencies] 8 | clap = { version = "4.4", features = ["derive"] } 9 | toml = {version = "0.8", default_features = false, features = ["parse"] } 10 | -------------------------------------------------------------------------------- /scripts/make_hash_from_regex.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | 5 | if [ $# -lt 1 ]; then 6 | echo "usage: $0 " 7 | fi 8 | 9 | if [ "$(basename "$(pwd)")" = "scripts" ]; then 10 | cd .. 11 | fi 12 | 13 | make --silent -j"$(nproc)" bin/keygen bin/keybuilder bin/keysynth 14 | ./bin/keysynth "$(./bin/keygen "$1" | ./bin/keybuilder)" 15 | # ./bin/keygen "$1" | ./bin/keybuilder 16 | -------------------------------------------------------------------------------- /src/keygen/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "keygen" 3 | version = "0.1.0" 4 | edition = "2021" 5 | authors = ["Leonardo G. Faé", "Renato B. Hoffmann"] 6 | 7 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 8 | 9 | [dependencies] 10 | clap = { version = "4.4", features = ["derive"] } 11 | rand = { version = "0.8", default_features = false, features = ["std_rng"] } 12 | rand_distr = { version = "0.4" } 13 | -------------------------------------------------------------------------------- /src/keybuilder/Makefile: -------------------------------------------------------------------------------- 1 | # Compilation variables 2 | COMMON_FLAGS := -std=c18 -Wall -Wextra -pedantic -pipe 3 | RELEASE_FLAGS := $(COMMON_FLAGS) -O2 -flto=auto -march=native 4 | DEBUG_FLAGS := $(COMMON_FLAGS) -Og -g3 -fsanitize=address,undefined 5 | 6 | all: keybuilder Makefile 7 | 8 | keybuilder-debug: src/main.c Makefile 9 | $(CC) $(CFLAGS) $(DEBUG_FLAGS) -o $@ $< -lm 10 | 11 | keybuilder: src/main.c Makefile 12 | $(CC) $(CFLAGS) $(RELEASE_FLAGS) -o $@ $< -lm 13 | 14 | clean: 15 | rm -vf keybuilder keybuilder-debug 16 | 17 | .PHONY: all clean 18 | -------------------------------------------------------------------------------- /src/keysynth/Makefile: -------------------------------------------------------------------------------- 1 | TARGET=keysynth 2 | 3 | # Compilation variables 4 | COMMON_FLAGS := -std=c++20 -Wall -Wextra -pedantic 5 | RELEASE_FLAGS := $(COMMON_FLAGS) -O2 -pipe -flto=auto -march=native 6 | DEBUG_FLAGS := $(COMMON_FLAGS) -Og -g3 -fsanitize=address,undefined -mbmi2 -msse2 -msse3 7 | 8 | all: $(TARGET) 9 | 10 | keysynth-debug: src/main.cpp 11 | $(CXX) $(CXXFLAGS) $(DEBUG_FLAGS) -o $@ $^ 12 | 13 | keysynth: src/main.cpp 14 | $(CXX) $(CXXFLAGS) $(RELEASE_FLAGS) -o $@ $^ 15 | 16 | clean: 17 | rm -vf keysynth keysynth-debug 18 | 19 | .PHONY: all clean 20 | -------------------------------------------------------------------------------- /scripts/install_abseil.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | 5 | ABSEIL_PATH="../deps/" 6 | 7 | # Parse arguments 8 | if [ $# -eq 1 ]; then 9 | ABSEIL_PATH=$1 10 | else 11 | mkdir -p ../deps 12 | fi 13 | 14 | # Check if directory exists 15 | #if [ ! -d "$ABSEIL_PATH" ]; then 16 | # echo "Installation directory $ABSEIL_PATH DOES NOT exists!" 17 | # exit 1 18 | #fi 19 | 20 | # Convert into absolute path 21 | ABSEIL_PATH=$(cd "$ABSEIL_PATH"; pwd) 22 | ABSEIL_PATH="${ABSEIL_PATH}/abseil/" 23 | 24 | echo "Installation path for Abseil: $ABSEIL_PATH" 25 | 26 | CURRENT_DIR=$(pwd) 27 | 28 | mkdir -p "${ABSEIL_PATH}" 29 | git clone https://github.com/abseil/abseil-cpp.git 30 | cd abseil-cpp/ && mkdir build && cd build 31 | cmake \ 32 | -DBUILD_TESTING=OFF \ 33 | -DABSL_BUILD_TESTING=OFF \ 34 | -DABSL_USE_GOOGLETEST_HEAD=OFF \ 35 | -DCMAKE_CXX_STANDARD=17 \ 36 | -DCMAKE_INSTALL_PREFIX="$ABSEIL_PATH" \ 37 | .. 38 | cmake --build . --target install -j"$(nproc)" 39 | cd "$CURRENT_DIR" 40 | rm -rf abseil-cpp 41 | -------------------------------------------------------------------------------- /results/gpt-prompts/ssn.txt: -------------------------------------------------------------------------------- 1 | For a hash function, assume that keys are SSN, always in the format 'nnn-nn-nnnn', where all characters are digits. The '-' character is a constant, so you can ignore it in your hash function. The fixed key size is 11 characters. 2 | 3 | The code is C++, and the keys are std::strings. Do not use std::hash. Assume you do not need to assert key format. 4 | 5 | Produce an optimized hash function for this specific case with an unrolled for loop, and also consider that the constant character is always the same and in the same position. 6 | 7 | ChatGPT 8 | Certainly! Below is an example of an optimized hash function for this specific case using an unrolled for loop in C++: 9 | 10 | cpp 11 | Copy code 12 | #include 13 | 14 | size_t customHash(const std::string& key) { 15 | constexpr size_t keySize = 11; 16 | size_t hashValue = 0; 17 | 18 | // Unrolled for loop for better performance 19 | for (size_t i = 0; i < keySize; i += 3) { 20 | hashValue = 37 * hashValue + static_cast(key[i] - '0'); 21 | hashValue = 37 * hashValue + static_cast(key[i + 1] - '0'); 22 | hashValue = 37 * hashValue + static_cast(key[i + 2] - '0'); 23 | } 24 | 25 | return hashValue; 26 | } 27 | This hash function takes advantage of the fixed key size and the fact that the '-' character is constant and in the same position. The loop is unrolled by three, as each group of three characters represents one part of the SSN. The multiplication by 37 is a common practice to mix the bits well, and subtracting '0' converts the character to its numeric value. This implementation should provide decent performance for this specific case. -------------------------------------------------------------------------------- /Regexes.toml: -------------------------------------------------------------------------------- 1 | # Regexes with \. should be \\. inside the TOML parser 2 | 3 | [URL_COMPLEX] 4 | regex = "http:/google\\.github\\.io/[a-z]{10}/version[0-9]{3}/doxygen/html/[a-z0-9]{20}\\.html" 5 | hashes = ["STDHashSrc", "AbseilHash", "CityHash", "FNVHash", "PextUrlComplex", "OffXorUrlComplex", "GptUrlComplex", "GperfUrlComplex", "NaiveUrlComplex" , "AesUrlComplex"] 6 | 7 | [URL_FIXED_PATTERN] 8 | regex = "http:/google\\.github\\.io/farmhash/doxygen/html/[a-z0-9]{20}\\.html" 9 | hashes = ["STDHashSrc", "AbseilHash", "CityHash", "FNVHash", "PextUrl", "OffXorUrl", "NaiveUrl", "GptUrl", "GperfUrl", "AesUrl"] 10 | 11 | [MAC] 12 | regex = "([0-9a-fA-F]{2}-){5}[0-9a-fA-F]{2}" 13 | hashes = ["STDHashSrc", "AbseilHash", "CityHash", "FNVHash", "PextMac", "OffXorMac", "NaiveMac", "GptMac", "GperfMac", "AesMac"] 14 | 15 | [CPF] 16 | regex = "(([0-9]{3})\\.){2}[0-9]{3}-[0-9]{2}" 17 | hashes = ["STDHashSrc", "AbseilHash", "CityHash", "FNVHash", "PextCPF", "OffXorCPF", "NaiveCPF", "GptCPF", "GperfCPF", "AesCPF"] 18 | 19 | [SSN] 20 | regex = "[0-9]{3}-[0-9]{2}-[0-9]{4}" 21 | hashes = ["STDHashSrc", "AbseilHash", "CityHash", "FNVHash", "PextSSN", "OffXorSSN", "NaiveSSN", "GptSSN", "GperfSSN", "AesSSN"] 22 | 23 | [IPV4] 24 | regex = "(([0-9]{3})\\.){3}[0-9]{3}" 25 | hashes = ["STDHashSrc", "AbseilHash", "CityHash", "FNVHash", "PextIPV4", "OffXorIPV4", "NaiveIPV4", "GptIPV4", "GperfIPV4", "AesIPV4"] 26 | 27 | [IPV6] 28 | regex = "([0-9a-fA-F]{4}:){7}[0-9a-fA-F]{4}" 29 | hashes = ["STDHashSrc", "AbseilHash", "CityHash", "FNVHash", "PextIPV6", "OffXorIPV6", "NaiveIPV6", "GptIPV6", "GperfIPV6", "AesIPV6"] 30 | 31 | [INTS] 32 | regex = "[0-9]{100}" 33 | hashes = ["STDHashSrc", "AbseilHash", "CityHash", "FNVHash", "PextINTS", "OffXorINTS", "NaiveINTS", "GptINTS", "GperfINTS", "AesINTS"] 34 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | all: bin bin/keygen bin/keybuilder bin/keysynth 2 | 3 | benchmark: bin/keyuser bin/sepe-runner 4 | 5 | keyuser: bin/keyuser 6 | 7 | bin: 8 | @mkdir -p bin/ 9 | 10 | bin/keygen: $(shell find src/keygen/src/ -type f) 11 | cd src/keygen && cargo build --release 12 | cp src/keygen/target/release/keygen $@ 13 | 14 | bin/keyuser: $(shell find src/keyuser/src/ -type f) src/keyuser/Makefile 15 | make -C src/keyuser keyuser 16 | cp src/keyuser/keyuser $@ 17 | 18 | bin/keyuser-debug: $(shell find src/keyuser/src/ -type f) src/keyuser/Makefile 19 | make -C src/keyuser keyuser-debug 20 | cp src/keyuser/keyuser-debug $@ 21 | 22 | bin/keybuilder: $(shell find src/keybuilder/src/ -type f) src/keybuilder/Makefile 23 | make -C src/keybuilder keybuilder 24 | cp src/keybuilder/keybuilder $@ 25 | 26 | bin/keybuilder-debug: $(shell find src/keybuilder/src/ -type f) src/keybuilder/Makefile 27 | make -C src/keybuilder keybuilder-debug 28 | cp src/keybuilder/keybuilder-debug $@ 29 | 30 | bin/keysynth: $(shell find src/keysynth/src/ -type f) src/keysynth/Makefile 31 | make -C src/keysynth keysynth 32 | cp src/keysynth/keysynth $@ 33 | 34 | bin/keysynth-debug: $(shell find src/keysynth/src/ -type f) src/keysynth/Makefile 35 | make -C src/keysynth keysynth-debug 36 | cp src/keysynth/keysynth $@ 37 | 38 | bin/sepe-runner: $(shell find src/sepe-runner/src/ -type f) 39 | cd src/sepe-runner && cargo build --release 40 | cp src/sepe-runner/target/release/sepe-runner $@ 41 | 42 | clean: 43 | cd src/keygen && cargo clean 44 | cd src/sepe-runner && cargo clean 45 | make -C src/keyuser clean 46 | make -C src/keysynth clean 47 | make -C src/keybuilder clean 48 | rm -rfv bin output *.csv 49 | 50 | results-clear: 51 | make -C results/ clear 52 | 53 | .PHONY: all clean 54 | -------------------------------------------------------------------------------- /scripts/distribution_benchmark.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | 5 | if [ "$(basename "$(pwd)")" = "scripts" ]; then 6 | cd .. 7 | fi 8 | 9 | if [ ! -d output ]; then 10 | mkdir output 11 | fi 12 | 13 | make -j"$(nproc)" 14 | make -j"$(nproc)" benchmark 15 | 16 | NUM_OPS=10000 17 | NUM_KEYS="10000" 18 | REGEXES="$(sed -n 's/^\[\(.*\)\]/\1/p' Regexes.toml)" 19 | REPETITIONS=1 20 | DISTRIBUTIONS=" 21 | 0.7 0.2 0.1 22 | 0.6 0.2 0.2 23 | 0.4 0.3 0.3 24 | " 25 | DISTRIBUTIONS_COUNT="$(echo "$DISTRIBUTIONS" | wc -w)" 26 | 27 | HISTOGRAM_DISTRIBUTION="normal uniform incremental" 28 | 29 | for HD in $HISTOGRAM_DISTRIBUTION; do 30 | for REGEX in $REGEXES; do 31 | COUNT=0 32 | for NUM_OP in $NUM_OPS; do 33 | for NUM_KEY in $NUM_KEYS; do 34 | for ARG in $(seq 1 3 "$DISTRIBUTIONS_COUNT"); do 35 | 36 | INSERTION="$( echo "$DISTRIBUTIONS" | tr '\n' ' ' | awk "{print \$$((ARG + 0))}")" 37 | SEARCH="$( echo "$DISTRIBUTIONS" | tr '\n' ' ' | awk "{print \$$((ARG + 1))}")" 38 | ELIMINATION="$(echo "$DISTRIBUTIONS" | tr '\n' ' ' | awk "{print \$$((ARG + 2))}")" 39 | 40 | ./bin/sepe-runner \ 41 | --histogram \ 42 | --distribution "$HD" \ 43 | --verbose \ 44 | --operations "$NUM_OP" \ 45 | --keys "$NUM_KEY" \ 46 | --insert "$INSERTION" \ 47 | --search "$SEARCH" \ 48 | --elimination "$ELIMINATION" \ 49 | --repetitions $REPETITIONS \ 50 | --outfile "${COUNT}.csv" \ 51 | "$REGEX" 52 | 53 | COUNT=$((COUNT + 1)) 54 | done 55 | done 56 | done 57 | done 58 | mv -v ./*.csv output/ 59 | zip -9 -o "$(date '+%Y-%m-%d_%Hh-%Mm-%Ss')""-histogram-${HD}.zip" -r output/* 60 | done 61 | -------------------------------------------------------------------------------- /results/gpt-prompts/url_fixed_pattern.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | User 4 | For a hash function, assume that keys are URL, always in the format 'http:/google\.github\.io/farmhash/doxygen/html/[a-z0-9]{20}\.html', where all characters are alphanumerical. All characters outside the [a-z0-9]{20} range are constant, so you can ignore them in your hash function. The fixed key size is 70 characters. 5 | 6 | The code is C++, and the keys are std::strings. Do not use std::hash. Assume you do not need to assert key format. 7 | 8 | Produce an optimized hash function for this specific case with an unrolled for loop, and also consider that the constant character is always the same and in the same position. 9 | 10 | ChatGPT 11 | Certainly! Here's an example of an optimized hash function in C++ for your specific case using an unrolled for loop. The constant character outside the [a-z0-9]{20} range is ignored, and only the characters within the specified range are considered for the hash calculation: 12 | 13 | cpp 14 | Copy code 15 | #include 16 | 17 | std::size_t customHash(const std::string& key) { 18 | constexpr std::size_t keySize = 70; 19 | constexpr std::size_t unrollFactor = 5; // Adjust as needed 20 | 21 | std::size_t hashValue = 0; 22 | 23 | // Unrolled loop for better performance 24 | for (std::size_t i = 0; i < keySize; i += unrollFactor) { 25 | hashValue ^= (static_cast(key[i]) << 0) | 26 | (static_cast(key[i + 1]) << 8) | 27 | (static_cast(key[i + 2]) << 16) | 28 | (static_cast(key[i + 3]) << 24) | 29 | (static_cast(key[i + 4]) << 32); 30 | } 31 | 32 | return hashValue; 33 | } 34 | In this example, the unrolled loop processes the string in chunks of 5 characters at a time (you can adjust unrollFactor as needed for your specific case). The XOR operation is used to combine the hash values of each chunk. This unrolling can help improve performance by reducing loop control overhead. 35 | -------------------------------------------------------------------------------- /src/keyuser/src/google-hashes/citycrc.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 Google, Inc. 2 | // 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy 4 | // of this software and associated documentation files (the "Software"), to deal 5 | // in the Software without restriction, including without limitation the rights 6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | // copies of the Software, and to permit persons to whom the Software is 8 | // furnished to do so, subject to the following conditions: 9 | // 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | // 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | // THE SOFTWARE. 20 | // 21 | // CityHash, by Geoff Pike and Jyrki Alakuijala 22 | // 23 | // This file declares the subset of the CityHash functions that require 24 | // _mm_crc32_u64(). See the CityHash README for details. 25 | // 26 | // Functions in the CityHash family are not suitable for cryptography. 27 | 28 | #ifndef CITY_HASH_CRC_H_ 29 | #define CITY_HASH_CRC_H_ 30 | 31 | #include "city.hpp" 32 | 33 | // Hash function for a byte array. 34 | uint128 CityHashCrc128(const char *s, size_t len); 35 | 36 | // Hash function for a byte array. For convenience, a 128-bit seed is also 37 | // hashed into the result. 38 | uint128 CityHashCrc128WithSeed(const char *s, size_t len, uint128 seed); 39 | 40 | // Hash function for a byte array. Sets result[0] ... result[3]. 41 | void CityHashCrc256(const char *s, size_t len, uint64 *result); 42 | 43 | #endif // CITY_HASH_CRC_H_ 44 | -------------------------------------------------------------------------------- /src/keyuser/Makefile: -------------------------------------------------------------------------------- 1 | TARGET=keyuser 2 | 3 | # Directories 4 | SRC_DIR := src 5 | OBJ_DIR := obj 6 | 7 | # Find all .cpp and .c files in the project 8 | SRCS := $(shell find . \( -name "*.cpp" -o -name "*.c" \) -printf "%P\n") 9 | OBJS := $(shell find . \( -name "*.cpp" -o -name "*.c" \) -exec basename {} \;) 10 | OBJS := $(patsubst %.cpp,$(OBJ_DIR)/%.o,$(patsubst %.c,$(OBJ_DIR)/%.o,$(OBJS))) 11 | DEBUG_OBJS := $(patsubst %.cpp,$(OBJ_DIR)/debug_%.o,$(patsubst %.c,$(OBJ_DIR)/debug_%.o,$(SRCS))) 12 | 13 | # Make, please look into subdirectories :) 14 | vpath %.cpp $(shell find $(SRC_DIR) -type d) 15 | vpath %.c $(shell find $(SRC_DIR) -type d) 16 | 17 | # ABSL 18 | ABSL_DIR := $(shell realpath ../../deps/abseil) 19 | ABSL_LIB := $(ABSL_DIR)/lib 20 | ABSL_INC := $(ABSL_DIR)/include 21 | ABSL_LIB_FILES :=-labsl_hash -labsl_city -labsl_low_level_hash 22 | 23 | ifeq ($(PLATFORM),x86_64) 24 | INTRINSICS=-mbmi2 -msse2 -msse3 25 | endif 26 | 27 | ifeq ($(PLATFORM),aarch64) 28 | INTRINSICS=-march=armv8-a+crypto 29 | endif 30 | 31 | # Compilation variables 32 | CXXFLAGS += -std=c++20 33 | COMMON_FLAGS := -Wall -Wextra -pedantic -I$(ABSL_INC) -L$(ABSL_LIB) -Igperf_hashes/ 34 | RELEASE_FLAGS := $(COMMON_FLAGS) -O2 -pipe -flto=auto -march=native $(INTRINSICS) 35 | DEBUG_FLAGS := $(COMMON_FLAGS) -Og -g3 -fsanitize=address,undefined $(INTRINSICS) 36 | 37 | all: $(TARGET) 38 | 39 | # CXX objects 40 | $(OBJ_DIR)/debug_%.o: %.cpp $(OBJ_DIR) Makefile 41 | $(CXX) $(CXXFLAGS) $(DEBUG_FLAGS) -c $< -o $@ 42 | $(OBJ_DIR)/%.o: %.cpp $(OBJ_DIR) Makefile 43 | $(CXX) $(CXXFLAGS) $(RELEASE_FLAGS) -c $< -o $@ 44 | 45 | # CC objects 46 | $(OBJ_DIR)/debug_%.o: %.c $(OBJ_DIR) Makefile 47 | $(CC) $(CFLAGS) $(DEBUG_FLAGS) -c $< -o $@ 48 | $(OBJ_DIR)/%.o: %.c $(OBJ_DIR) Makefile 49 | $(CC) $(CFLAGS) $(RELEASE_FLAGS) -c $< -o $@ 50 | 51 | $(OBJ_DIR): 52 | @mkdir -p obj/ 53 | 54 | keyuser-debug: $(DEBUG_OBJS) 55 | $(CXX) $(CXXFLAGS) $(DEBUG_FLAGS) -o $@ $^ $(ABSL_LIB_FILES) 56 | 57 | keyuser: $(OBJS) 58 | $(CXX) $(CXXFLAGS) $(RELEASE_FLAGS) -o $@ $^ $(ABSL_LIB_FILES) 59 | 60 | clean: 61 | rm -vf keyuser keyuser-debug \ 62 | $(DEBUG_OBJS) \ 63 | $(OBJS) 64 | 65 | .PHONY: all clean tests 66 | -------------------------------------------------------------------------------- /results/gpt-prompts/cpf.txt: -------------------------------------------------------------------------------- 1 | User 2 | For a hash function, assume that keys are Brazilian CPF, always in the format 'nnn.nnn.nnn-nn', where all characters are digits. The '-' and '.' characters are constant, so you can ignore them in your hash function. The fixed key size is 14 characters. 3 | 4 | The code is C++, and the keys are std::strings. Do not use std::hash. Assume you do not need to assert key format. 5 | 6 | Produce an optimized hash function for this specific case with an unrolled for loop, and also consider that the constant character is always the same and in the same position. 7 | 8 | ChatGPT 9 | Certainly! To create an optimized hash function for the given case (Brazilian CPF keys with a fixed format), you can use a simple unrolled loop along with a mix of prime numbers for the hash. Here's an example implementation in C++: 10 | 11 | cpp 12 | Copy code 13 | #include 14 | 15 | std::size_t customHash(const std::string& key) { 16 | const std::size_t prime = 31; // A prime number for hashing 17 | 18 | // Unrolled loop for calculating the hash 19 | std::size_t hashValue = 0; 20 | hashValue += key[0] * 1000000000000ULL; 21 | hashValue += key[1] * 100000000000ULL; 22 | hashValue += key[2] * 10000000000ULL; 23 | hashValue += key[4] * 1000000000ULL; 24 | hashValue += key[5] * 100000000ULL; 25 | hashValue += key[6] * 10000000ULL; 26 | hashValue += key[8] * 1000000ULL; 27 | hashValue += key[9] * 100000ULL; 28 | hashValue += key[10] * 10000ULL; 29 | hashValue += key[11] * 1000ULL; 30 | hashValue += key[12] * 100ULL; 31 | hashValue += key[13] * 10ULL; 32 | hashValue += key[14] * 1ULL; 33 | 34 | // Mixing with a prime number 35 | hashValue = (hashValue * prime); 36 | 37 | return hashValue; 38 | } 39 | 40 | int main() { 41 | std::string cpf = "123.456.789-09"; 42 | std::size_t hashValue = customHash(cpf); 43 | // Use hashValue as needed... 44 | return 0; 45 | } 46 | This hash function unrolls the loop, considering that the constant characters ('.' and '-') are ignored, and calculates the hash based on the position of each digit in the CPF. The use of prime numbers in the mixing step helps improve the distribution of the hash values. Adjust the prime number and the unrolled loop positions as needed for further optimization. 47 | -------------------------------------------------------------------------------- /results/gpt-prompts/ipv4.txt: -------------------------------------------------------------------------------- 1 | For a hash function, assume that keys are IPV4, always in the format '123.456.789.123', where all characters are digits. The '.' character is a constant, so you can ignore it in your hash function. The fixed key size is 15 characters. 2 | 3 | The code is C++, and the keys are std::strings. Do not use std::hash. Assume you do not need to assert key format. 4 | 5 | Produce an optimized hash function for this specific case with an unrolled for loop, and also consider that the constant character is always the same and in the same position. 6 | ChatGPT 7 | Certainly! Here's an optimized hash function for the given case in C++ using an unrolled for loop: 8 | 9 | cpp 10 | Copy code 11 | #include 12 | 13 | // Custom hash function for IPV4 keys 14 | std::size_t customHash(const std::string& key) { 15 | // Assuming key.size() is always 15 16 | const char constantChar = '.'; // Constant character 17 | 18 | // Unrolled for loop for better performance 19 | std::size_t hashValue = 0; 20 | hashValue += static_cast(key[0]) * 31; 21 | hashValue += static_cast(key[1]) * 37; 22 | hashValue += static_cast(key[2]) * 41; 23 | hashValue += static_cast(key[3]) * 43; 24 | hashValue += static_cast(key[4]) * 47; 25 | hashValue += static_cast(key[5]) * 53; 26 | hashValue += static_cast(key[6]) * 59; 27 | hashValue += static_cast(key[7]) * 61; 28 | hashValue += static_cast(key[8]) * 67; 29 | hashValue += static_cast(key[9]) * 71; 30 | hashValue += static_cast(key[10]) * 73; 31 | hashValue += static_cast(key[11]) * 79; 32 | hashValue += static_cast(key[12]) * 83; 33 | hashValue += static_cast(key[13]) * 89; 34 | hashValue += static_cast(key[14]) * 97; 35 | 36 | return hashValue; 37 | } 38 | 39 | int main() { 40 | std::string ipv4Key = "123.456.789.123"; 41 | std::size_t hashValue = customHash(ipv4Key); 42 | 43 | // Use hashValue as needed 44 | return 0; 45 | } 46 | This hash function uses an unrolled for loop to calculate the hash value by considering each character in the key. Adjust the prime numbers in the multiplication to suit your specific needs and ensure a good distribution of hash values. 47 | 48 | 49 | 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /src/keyuser/src/gperf-hashes/gperf-hashes.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file gperf-hashes.h 3 | * @brief This file contains a collection of hash functions generated by gperf using 1000 keys generated by keygen using the default seed. 4 | */ 5 | 6 | #ifndef GPERF_HASHES_HPP 7 | #define GPERF_HASHES_HPP 8 | 9 | #include 10 | 11 | /** 12 | * @brief Calculates the hash of a URL complex string. 13 | * 14 | * @param str The string to hash. 15 | * @param len The length of the string. 16 | * @return The hash of the string. 17 | */ 18 | unsigned int 19 | GperfUrlComplexHash (const char *str, size_t len); 20 | 21 | /** 22 | * @brief Calculates the hash of a URL string. 23 | * 24 | * @param str The string to hash. 25 | * @param len The length of the string. 26 | * @return The hash of the string. 27 | */ 28 | unsigned int 29 | GperfUrlHash (const char *str, size_t len); 30 | 31 | /** 32 | * @brief Calculates the hash of a MAC address string. 33 | * 34 | * @param str The string to hash. 35 | * @param len The length of the string. 36 | * @return The hash of the string. 37 | */ 38 | unsigned int 39 | GperfMACHash (const char *str, size_t len); 40 | 41 | /** 42 | * @brief Calculates the hash of a CPF number string. 43 | * 44 | * @param str The string to hash. 45 | * @param len The length of the string. 46 | * @return The hash of the string. 47 | */ 48 | unsigned int 49 | GperfCPFHash (const char *str, size_t len); 50 | 51 | /** 52 | * @brief Calculates the hash of a SSN number string. 53 | * 54 | * @param str The string to hash. 55 | * @param len The length of the string. 56 | * @return The hash of the string. 57 | */ 58 | unsigned int 59 | GperfSSNHash (const char *str, size_t len); 60 | 61 | /** 62 | * @brief Calculates the hash of an IPV4 address string. 63 | * 64 | * @param str The string to hash. 65 | * @param len The length of the string. 66 | * @return The hash of the string. 67 | */ 68 | unsigned int 69 | GperfIPV4Hash (const char *str, size_t len); 70 | 71 | /** 72 | * @brief Calculates the hash of an IPV6 address string. 73 | * 74 | * @param str The string to hash. 75 | * @param len The length of the string. 76 | * @return The hash of the string. 77 | */ 78 | unsigned int 79 | GperfIPV6Hash (const char *str, size_t len); 80 | 81 | /** 82 | * @brief Calculates the hash of an INTS string. 83 | * 84 | * @param str The string to hash. 85 | * @param len The length of the string. 86 | * @return The hash of the string. 87 | */ 88 | unsigned int 89 | GperfINTSHash (const char *str, size_t len); 90 | 91 | #endif -------------------------------------------------------------------------------- /results/gpt-prompts/url_complex.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | User 4 | For a hash function, assume that keys are URL, always in the format 'http:/google\.github\.io/[a-z]{10}/version[0-9]{3}/doxygen/html/[a-z0-9]{20}\.html', where all characters are alphanumerical. All characters outside the [a-z]{10}, [0-9]{3}, and [a-z0-9]{20} ranges are constant, so you can ignore them in your hash function. The fixed key size is 82 characters. 5 | 6 | The code is C++, and the keys are std::strings. Do not use std::hash. Assume you do not need to assert key format. 7 | 8 | Produce an optimized hash function for this specific case with an unrolled for loop, and also consider that the constant character is always the same and in the same position. 9 | ChatGPT 10 | Certainly! Here's an optimized hash function for your specific case in C++ with an unrolled loop: 11 | 12 | cpp 13 | Copy code 14 | #include 15 | 16 | uint64_t customHash(const std::string& key) { 17 | // Constants 18 | const char constantChar = '/'; 19 | 20 | // Unrolled loop for hash calculation 21 | uint64_t hash = 0; 22 | 23 | // Process [a-z]{10} 24 | hash ^= (key[7] << 56) | (key[8] << 48) | (key[9] << 40) | (key[10] << 32) | 25 | (key[11] << 24) | (key[12] << 16) | (key[13] << 8) | key[14]; 26 | 27 | // Process [0-9]{3} 28 | hash ^= (key[27] << 56) | (key[28] << 48) | (key[29] << 40); 29 | 30 | // Process [a-z0-9]{20} 31 | hash ^= (key[46] << 56) | (key[47] << 48) | (key[48] << 40) | (key[49] << 32) | 32 | (key[50] << 24) | (key[51] << 16) | (key[52] << 8) | key[53]; 33 | 34 | hash ^= (key[54] << 56) | (key[55] << 48) | (key[56] << 40) | (key[57] << 32) | 35 | (key[58] << 24) | (key[59] << 16) | (key[60] << 8) | key[61]; 36 | 37 | hash ^= (key[62] << 56) | (key[63] << 48) | (key[64] << 40) | (key[65] << 32) | 38 | (key[66] << 24) | (key[67] << 16) | (key[68] << 8) | key[69]; 39 | 40 | hash ^= (key[70] << 56) | (key[71] << 48) | (key[72] << 40) | (key[73] << 32) | 41 | (key[74] << 24) | (key[75] << 16) | (key[76] << 8) | key[77]; 42 | 43 | hash ^= (key[78] << 56) | (key[79] << 48) | (key[80] << 40) | (key[81] << 32); 44 | 45 | return hash; 46 | } 47 | This code assumes that the constant character is always the same ('/') and in the same position. It unrolls the loop manually, taking advantage of the fixed key size and structure. Note that this is a simple example, and depending on your specific use case, you might need to consider more sophisticated hash functions for better distribution properties. 48 | -------------------------------------------------------------------------------- /results/gpt-prompts/ints.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | User 4 | 5 | For a hash function, assume that keys are INTEGERS with a fixed key size of 100 characters. 6 | 7 | The code is C++, and the keys are std::strings. Do not use std::hash. Assume you do not need to assert key format. 8 | 9 | Produce an optimized hash function for this specific case with an unrolled for loop, and also consider that the constant character is always the same and in the same position. 10 | ChatGPT 11 | Certainly! For a hash function that takes integer keys with a fixed size of 100 characters, and the keys are represented as std::strings in C++, you can create a custom hash function. In this example, I'll assume that the constant character is always at position 42 in the string. Below is a simple example of an optimized hash function with an unrolled for loop: 12 | 13 | cpp 14 | Copy code 15 | #include 16 | 17 | std::size_t customHash(const std::string& key) { 18 | constexpr std::size_t keySize = 100; 19 | constexpr std::size_t unrollFactor = 10; // Unroll the loop by a factor of 10 for optimization 20 | 21 | std::size_t hashValue = 0; 22 | 23 | // Unrolled loop for better performance 24 | for (std::size_t i = 0; i < keySize; i += unrollFactor) { 25 | hashValue ^= (static_cast(key[i]) << 7) ^ (static_cast(key[i + 1]) << 14) 26 | ^ (static_cast(key[i + 2]) << 21) ^ (static_cast(key[i + 3]) << 28) 27 | ^ (static_cast(key[i + 4]) << 35) ^ (static_cast(key[i + 5]) << 42) 28 | ^ (static_cast(key[i + 6]) << 49) ^ (static_cast(key[i + 7]) << 56) 29 | ^ (static_cast(key[i + 8]) << 63) ^ (static_cast(key[i + 9]) << 70); 30 | } 31 | 32 | // Handle the remaining characters in the key (if any) 33 | for (std::size_t i = (keySize / unrollFactor) * unrollFactor; i < keySize; ++i) { 34 | hashValue ^= static_cast(key[i]) << ((i % unrollFactor) * 7); 35 | } 36 | 37 | return hashValue; 38 | } 39 | 40 | int main() { 41 | std::string key = "0123456789"; // Replace with your actual key 42 | 43 | std::size_t hashValue = customHash(key); 44 | 45 | // Use hashValue as needed 46 | return 0; 47 | } 48 | This example unrolls the loop by a factor of 10, and the XOR operations help combine the hash values of individual characters. You can adjust the unroll factor based on your specific requirements and experiment to find the optimal value for your use case. 49 | -------------------------------------------------------------------------------- /results/gpt-prompts/ipv6.txt: -------------------------------------------------------------------------------- 1 | For a hash function, assume that keys are IPV6, always in the format '2001:0db8:85a3:0000:0000:8a2e:0370:7334', where all characters are hexadecimal. The ':' character is a constant, so you can ignore it in your hash function. The fixed key size is 39 characters. 2 | 3 | The code is C++, and the keys are std::strings. Do not use std::hash. Assume you do not need to assert key format. 4 | 5 | Produce an optimized hash function for this specific case with an unrolled for loop, and also consider that the constant character is always the same and in the same position. 6 | ChatGPT 7 | Certainly! Here's an optimized hash function in C++ for the specified case using an unrolled loop: 8 | 9 | cpp 10 | Copy code 11 | #include 12 | 13 | // Hash function for IPv6 keys 14 | std::size_t hashIPv6(const std::string& key) { 15 | // Assuming key is always in the format '2001:0db8:85a3:0000:0000:8a2e:0370:7334' 16 | // and ':' is ignored in the hash computation 17 | 18 | // Unrolled loop for optimized performance 19 | std::size_t hashValue = 0; 20 | 21 | // Process each segment (assuming 8 segments separated by ':') 22 | hashValue ^= (std::size_t(key[0]) << 24) | (std::size_t(key[1]) << 16) | (std::size_t(key[2]) << 8) | std::size_t(key[3]); 23 | hashValue ^= (std::size_t(key[5]) << 24) | (std::size_t(key[6]) << 16) | (std::size_t(key[7]) << 8) | std::size_t(key[8]); 24 | hashValue ^= (std::size_t(key[10]) << 24) | (std::size_t(key[11]) << 16) | (std::size_t(key[12]) << 8) | std::size_t(key[13]); 25 | hashValue ^= (std::size_t(key[15]) << 24) | (std::size_t(key[16]) << 16) | (std::size_t(key[17]) << 8) | std::size_t(key[18]); 26 | hashValue ^= (std::size_t(key[20]) << 24) | (std::size_t(key[21]) << 16) | (std::size_t(key[22]) << 8) | std::size_t(key[23]); 27 | hashValue ^= (std::size_t(key[25]) << 24) | (std::size_t(key[26]) << 16) | (std::size_t(key[27]) << 8) | std::size_t(key[28]); 28 | hashValue ^= (std::size_t(key[30]) << 24) | (std::size_t(key[31]) << 16) | (std::size_t(key[32]) << 8) | std::size_t(key[33]); 29 | hashValue ^= (std::size_t(key[35]) << 24) | (std::size_t(key[36]) << 16) | (std::size_t(key[37]) << 8) | std::size_t(key[38]); 30 | 31 | return hashValue; 32 | } 33 | 34 | int main() { 35 | std::string ipv6Key = "2001:0db8:85a3:0000:0000:8a2e:0370:7334"; 36 | std::size_t hashValue = hashIPv6(ipv6Key); 37 | // Use hashValue as needed 38 | return 0; 39 | } 40 | This code unrolls the loop to process each segment of the IPv6 key. It XORs the values of each segment, assuming that the ':' character is ignored in the hash computation. This unrolling can lead to better performance, especially when dealing with a fixed-size input like an IPv6 key. 41 | 42 | -------------------------------------------------------------------------------- /scripts/benchmark.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | 5 | if [ "$(basename "$(pwd)")" = "scripts" ]; then 6 | cd .. 7 | fi 8 | 9 | if [ ! -d output ]; then 10 | mkdir output 11 | fi 12 | 13 | make -j"$(nproc)" 14 | make -j"$(nproc)" benchmark 15 | 16 | DISTRIBUTIONS="normal uniform" 17 | NUM_OPS=10000 18 | NUM_KEYS="500 2000 10000" 19 | REGEXES="$(sed -n 's/^\[\(.*\)\]/\1/p' Regexes.toml)" 20 | REPETITIONS=10 21 | PERCENTAGES=" 22 | 0.7 0.2 0.1 23 | 0.6 0.2 0.2 24 | 0.4 0.3 0.3 25 | " 26 | PERCENTAGES_COUNT="$(echo "$PERCENTAGES" | wc -w)" 27 | 28 | for REGEX in $REGEXES; do 29 | COUNT=0 30 | for DISTRIBUTION in $DISTRIBUTIONS; do 31 | for NUM_OP in $NUM_OPS; do 32 | for NUM_KEY in $NUM_KEYS; do 33 | for ARG in $(seq 1 3 "$PERCENTAGES_COUNT"); do 34 | 35 | INSERTION="$( echo "$PERCENTAGES" | tr '\n' ' ' | awk "{print \$$((ARG + 0))}")" 36 | SEARCH="$( echo "$PERCENTAGES" | tr '\n' ' ' | awk "{print \$$((ARG + 1))}")" 37 | ELIMINATION="$(echo "$PERCENTAGES" | tr '\n' ' ' | awk "{print \$$((ARG + 2))}")" 38 | 39 | ./bin/sepe-runner \ 40 | --verbose \ 41 | --distribution "$DISTRIBUTION" \ 42 | --operations "$NUM_OP" \ 43 | --keys "$NUM_KEY" \ 44 | --insert "$INSERTION" \ 45 | --search "$SEARCH" \ 46 | --elimination "$ELIMINATION" \ 47 | --repetitions $REPETITIONS \ 48 | --outfile "${COUNT}.csv" \ 49 | "$REGEX" 50 | 51 | COUNT=$((COUNT + 1)) 52 | done 53 | done 54 | done 55 | done 56 | done 57 | 58 | mv -v ./*.csv output/ 59 | zip -9 -o RQ1_RQ2.zip -r output/* 60 | 61 | for REGEX in $REGEXES; do 62 | COUNT=0 63 | for DISTRIBUTION in $DISTRIBUTIONS; do 64 | for NUM_OP in $NUM_OPS; do 65 | for NUM_KEY in $NUM_KEYS; do 66 | for ARG in $(seq 1 3 "$PERCENTAGES_COUNT"); do 67 | 68 | INSERTION="$( echo "$PERCENTAGES" | tr '\n' ' ' | awk "{print \$$((ARG + 0))}")" 69 | SEARCH="$( echo "$PERCENTAGES" | tr '\n' ' ' | awk "{print \$$((ARG + 1))}")" 70 | ELIMINATION="$(echo "$PERCENTAGES" | tr '\n' ' ' | awk "{print \$$((ARG + 2))}")" 71 | 72 | ./bin/sepe-runner \ 73 | --verbose \ 74 | --hash-performance \ 75 | --distribution "$DISTRIBUTION" \ 76 | --operations "$NUM_OP" \ 77 | --keys "$NUM_KEY" \ 78 | --insert "$INSERTION" \ 79 | --search "$SEARCH" \ 80 | --elimination "$ELIMINATION" \ 81 | --repetitions $REPETITIONS \ 82 | --outfile "${COUNT}.csv" \ 83 | "$REGEX" 84 | 85 | COUNT=$((COUNT + 1)) 86 | done 87 | done 88 | done 89 | done 90 | done 91 | 92 | mv -v ./*.csv output/ 93 | zip -9 -o RQ1_RQ2_hash-performance.zip -r output/* -------------------------------------------------------------------------------- /results/gpt-prompts/mac.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | User 4 | For a hash function, assume that keys are MAC, always in the format 'XX:XX:XX:XX:XX:XX', where all characters are hexadecimal. The ':' character is constant, so you can ignore them in your hash function. The fixed key size is 17 characters. 5 | 6 | The code is C++, and the keys are std::strings. Do not use std::hash. Assume you do not need to assert key format. 7 | 8 | Produce an optimized hash function for this specific case with an unrolled for loop, and also consider that the constant character is always the same and in the same position. 9 | ChatGPT 10 | Certainly! Here's an example of an optimized hash function in C++ for the given scenario using an unrolled loop: 11 | 12 | cpp 13 | Copy code 14 | #include 15 | 16 | std::size_t customHash(const std::string& key) { 17 | // Assuming the key is always in the format 'XX:XX:XX:XX:XX:XX' 18 | // where the ':' character is at positions 2, 5, 8, 11, 14 19 | // and ignoring those positions while hashing 20 | 21 | const char constantChar = ':'; // The constant character 22 | 23 | // Unrolled loop for hash calculation 24 | std::size_t hashValue = 0; 25 | 26 | hashValue ^= static_cast(key[0]) + 0x9e3779b9 + (hashValue << 6) + (hashValue >> 2); 27 | hashValue ^= static_cast(key[1]) + 0x9e3779b9 + (hashValue << 6) + (hashValue >> 2); 28 | 29 | hashValue ^= static_cast(key[3]) + 0x9e3779b9 + (hashValue << 6) + (hashValue >> 2); 30 | hashValue ^= static_cast(key[4]) + 0x9e3779b9 + (hashValue << 6) + (hashValue >> 2); 31 | 32 | hashValue ^= static_cast(key[6]) + 0x9e3779b9 + (hashValue << 6) + (hashValue >> 2); 33 | hashValue ^= static_cast(key[7]) + 0x9e3779b9 + (hashValue << 6) + (hashValue >> 2); 34 | 35 | hashValue ^= static_cast(key[9]) + 0x9e3779b9 + (hashValue << 6) + (hashValue >> 2); 36 | hashValue ^= static_cast(key[10]) + 0x9e3779b9 + (hashValue << 6) + (hashValue >> 2); 37 | 38 | hashValue ^= static_cast(key[12]) + 0x9e3779b9 + (hashValue << 6) + (hashValue >> 2); 39 | hashValue ^= static_cast(key[13]) + 0x9e3779b9 + (hashValue << 6) + (hashValue >> 2); 40 | 41 | hashValue ^= static_cast(key[15]) + 0x9e3779b9 + (hashValue << 6) + (hashValue >> 2); 42 | hashValue ^= static_cast(key[16]) + 0x9e3779b9 + (hashValue << 6) + (hashValue >> 2); 43 | 44 | // Combine with constant character 45 | hashValue ^= static_cast(constantChar) + 0x9e3779b9 + (hashValue << 6) + (hashValue >> 2); 46 | 47 | return hashValue; 48 | } 49 | 50 | int main() { 51 | std::string key = "1A:2B:3C:4D:5E:6F"; 52 | std::size_t hashValue = customHash(key); 53 | // Use hashValue as needed 54 | return 0; 55 | } 56 | This hash function uses the Jenkins hash algorithm, and it's unrolled to potentially improve performance. Note that this is just one example, and the actual performance gains may depend on the specific compiler and platform. Adjustments may be needed based on your specific requirements and testing. 57 | -------------------------------------------------------------------------------- /src/keyuser/src/registry.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file registry.hpp 3 | * @brief This file contains macros for registering benchmarks. 4 | * @brief New Custom Hash functions must be manually added to the REGISTER_ALL_BENCHMARKS macro. 5 | */ 6 | 7 | /** 8 | * @def DECLARE_ONE_BENCH(name, hashname) 9 | * @brief This macro creates a new benchmark object. 10 | * @param name The name of the benchmark. 11 | * @param hashname The name of the hash function used in the benchmark. 12 | * @return A pointer to the new benchmark object. 13 | */ 14 | #define DECLARE_ONE_BENCH(name, hashname) (Benchmark*)new name(#name,#hashname) 15 | 16 | /** 17 | * @def REGISTER_BENCHMARKS(hashname) 18 | * @brief This macro registers benchmarks for a specific hash function. 19 | * @param hashname The name of the hash function. 20 | */ 21 | #define REGISTER_BENCHMARKS(hashname) benchmarks.push_back(DECLARE_ONE_BENCH(UnorderedMapBench, hashname)); \ 22 | benchmarks.push_back(DECLARE_ONE_BENCH(UnorderedMultiMapBench, hashname)); \ 23 | benchmarks.push_back(DECLARE_ONE_BENCH(UnorderedSetBench, hashname)); \ 24 | benchmarks.push_back(DECLARE_ONE_BENCH(UnorderedMultisetBench, hashname)); 25 | 26 | /** 27 | * @def REGISTER_ALL_BENCHMARKS 28 | * @brief This macro registers all benchmarks for all hash functions. 29 | */ 30 | #define REGISTER_ALL_BENCHMARKS \ 31 | REGISTER_BENCHMARKS(AbseilHash); \ 32 | REGISTER_BENCHMARKS(STDHashBin); \ 33 | REGISTER_BENCHMARKS(STDHashSrc); \ 34 | REGISTER_BENCHMARKS(FNVHash); \ 35 | REGISTER_BENCHMARKS(CityHash); \ 36 | REGISTER_BENCHMARKS(IPV4HashUnrolled); \ 37 | REGISTER_BENCHMARKS(IPV4HashMove); \ 38 | REGISTER_BENCHMARKS(IntSimdHash); \ 39 | REGISTER_BENCHMARKS(PextUrlComplex); \ 40 | REGISTER_BENCHMARKS(PextUrl); \ 41 | REGISTER_BENCHMARKS(PextMac); \ 42 | REGISTER_BENCHMARKS(PextCPF); \ 43 | REGISTER_BENCHMARKS(PextSSN); \ 44 | REGISTER_BENCHMARKS(PextIPV4); \ 45 | REGISTER_BENCHMARKS(PextIPV6); \ 46 | REGISTER_BENCHMARKS(PextINTS); \ 47 | REGISTER_BENCHMARKS(OffXorUrlComplex); \ 48 | REGISTER_BENCHMARKS(OffXorUrl); \ 49 | REGISTER_BENCHMARKS(OffXorMac); \ 50 | REGISTER_BENCHMARKS(OffXorCPF); \ 51 | REGISTER_BENCHMARKS(OffXorSSN); \ 52 | REGISTER_BENCHMARKS(OffXorIPV4); \ 53 | REGISTER_BENCHMARKS(OffXorIPV6); \ 54 | REGISTER_BENCHMARKS(OffXorINTS); \ 55 | REGISTER_BENCHMARKS(NaiveUrlComplex); \ 56 | REGISTER_BENCHMARKS(NaiveUrl); \ 57 | REGISTER_BENCHMARKS(NaiveMac); \ 58 | REGISTER_BENCHMARKS(NaiveCPF); \ 59 | REGISTER_BENCHMARKS(NaiveSSN); \ 60 | REGISTER_BENCHMARKS(NaiveIPV4); \ 61 | REGISTER_BENCHMARKS(NaiveIPV6); \ 62 | REGISTER_BENCHMARKS(NaiveINTS); \ 63 | REGISTER_BENCHMARKS(AesUrlComplex); \ 64 | REGISTER_BENCHMARKS(AesUrl); \ 65 | REGISTER_BENCHMARKS(AesMac); \ 66 | REGISTER_BENCHMARKS(AesCPF); \ 67 | REGISTER_BENCHMARKS(AesSSN); \ 68 | REGISTER_BENCHMARKS(AesIPV4); \ 69 | REGISTER_BENCHMARKS(AesIPV6); \ 70 | REGISTER_BENCHMARKS(AesINTS); \ 71 | REGISTER_BENCHMARKS(NaiveSimdUrlComplex); \ 72 | REGISTER_BENCHMARKS(NaiveSimdUrl); \ 73 | REGISTER_BENCHMARKS(NaiveSimdIPV6); \ 74 | REGISTER_BENCHMARKS(NaiveSimdINTS); \ 75 | REGISTER_BENCHMARKS(GptUrlComplex); \ 76 | REGISTER_BENCHMARKS(GptUrl); \ 77 | REGISTER_BENCHMARKS(GptMac); \ 78 | REGISTER_BENCHMARKS(GptCPF); \ 79 | REGISTER_BENCHMARKS(GptSSN); \ 80 | REGISTER_BENCHMARKS(GptIPV4); \ 81 | REGISTER_BENCHMARKS(GptIPV6); \ 82 | REGISTER_BENCHMARKS(GptINTS); \ 83 | REGISTER_BENCHMARKS(GperfUrlComplex); \ 84 | REGISTER_BENCHMARKS(GperfUrl); \ 85 | REGISTER_BENCHMARKS(GperfMac); \ 86 | REGISTER_BENCHMARKS(GperfCPF); \ 87 | REGISTER_BENCHMARKS(GperfSSN); \ 88 | REGISTER_BENCHMARKS(GperfIPV4); \ 89 | REGISTER_BENCHMARKS(GperfIPV6); \ 90 | REGISTER_BENCHMARKS(GperfINTS); \ 91 | REGISTER_BENCHMARKS(PextMurmurUrlComplex); \ 92 | REGISTER_BENCHMARKS(PextMurmurINTS); 93 | -------------------------------------------------------------------------------- /src/keyuser/src/gperf-hashes/url_fixed_pattern.c: -------------------------------------------------------------------------------- 1 | /* ANSI-C code produced by gperf version 3.1 */ 2 | /* Command-line: ./bin/gperf input.txt */ 3 | /* Computed positions: -k'46-47,60,62' */ 4 | 5 | #include "gperf-hashes.h" 6 | 7 | #if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \ 8 | && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \ 9 | && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \ 10 | && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \ 11 | && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \ 12 | && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \ 13 | && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \ 14 | && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \ 15 | && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \ 16 | && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \ 17 | && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \ 18 | && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \ 19 | && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \ 20 | && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \ 21 | && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \ 22 | && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \ 23 | && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \ 24 | && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \ 25 | && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \ 26 | && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \ 27 | && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \ 28 | && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \ 29 | && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126)) 30 | /* The character set is not based on ISO-646. */ 31 | #error "gperf generated tables don't work with this execution character set. Please report a bug to ." 32 | #endif 33 | 34 | 35 | #define TOTAL_KEYWORDS 1000 36 | #define MIN_WORD_LENGTH 70 37 | #define MAX_WORD_LENGTH 70 38 | #define MIN_HASH_VALUE 20 39 | #define MAX_HASH_VALUE 7027 40 | /* maximum key range = 7008, duplicates = 0 */ 41 | 42 | #ifdef __GNUC__ 43 | __inline 44 | #else 45 | #ifdef __cplusplus 46 | inline 47 | #endif 48 | #endif 49 | /*ARGSUSED*/ 50 | unsigned int 51 | GperfUrlHash (const char *str, size_t len) 52 | { 53 | static unsigned short asso_values[] = 54 | { 55 | 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 56 | 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 57 | 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 58 | 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 59 | 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 464, 1954, 60 | 801, 1634, 576, 6, 30, 325, 110, 85, 1199, 1934, 61 | 793, 1864, 825, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 62 | 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 63 | 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 64 | 7028, 7028, 7028, 7028, 7028, 7028, 7028, 471, 488, 513, 65 | 1831, 292, 15, 1015, 506, 20, 435, 506, 55, 575, 66 | 900, 5, 10, 290, 5, 0, 60, 180, 80, 745, 67 | 136, 1, 365, 1799, 107, 2363, 936, 212, 7028, 7028, 68 | 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 69 | 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 70 | 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 71 | 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 72 | 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 73 | 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 74 | 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 75 | 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 76 | 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 77 | 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 78 | 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 79 | 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 80 | 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 81 | 7028 82 | }; 83 | return asso_values[(unsigned char)str[61]+5] + asso_values[(unsigned char)str[59]+4] + asso_values[(unsigned char)str[46]] + asso_values[(unsigned char)str[45]+1]; 84 | } 85 | 86 | -------------------------------------------------------------------------------- /src/keyuser/src/gperf-hashes/cpf.c: -------------------------------------------------------------------------------- 1 | /* ANSI-C code produced by gperf version 3.1 */ 2 | /* Command-line: ./bin/gperf */ 3 | /* Computed positions: -k'1,5,13-14' */ 4 | 5 | #include "gperf-hashes.h" 6 | 7 | #if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \ 8 | && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \ 9 | && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \ 10 | && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \ 11 | && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \ 12 | && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \ 13 | && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \ 14 | && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \ 15 | && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \ 16 | && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \ 17 | && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \ 18 | && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \ 19 | && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \ 20 | && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \ 21 | && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \ 22 | && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \ 23 | && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \ 24 | && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \ 25 | && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \ 26 | && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \ 27 | && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \ 28 | && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \ 29 | && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126)) 30 | /* The character set is not based on ISO-646. */ 31 | #error "gperf generated tables don't work with this execution character set. Please report a bug to ." 32 | #endif 33 | 34 | 35 | #define TOTAL_KEYWORDS 1000 36 | #define MIN_WORD_LENGTH 17 37 | #define MAX_WORD_LENGTH 17 38 | #define MIN_HASH_VALUE 15 39 | #define MAX_HASH_VALUE 6416 40 | /* maximum key range = 6402, duplicates = 0 */ 41 | 42 | #ifdef __GNUC__ 43 | __inline 44 | #else 45 | #ifdef __cplusplus 46 | inline 47 | #endif 48 | #endif 49 | /*ARGSUSED*/ 50 | unsigned int 51 | GperfCPFHash (const char *str, size_t len) 52 | { 53 | static unsigned short asso_values[] = 54 | { 55 | 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 56 | 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 57 | 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 58 | 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 59 | 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 1494, 825, 60 | 41, 206, 865, 435, 0, 10, 60, 25, 475, 1694, 61 | 80, 405, 665, 45, 27, 300, 15, 5, 0, 30, 62 | 150, 175, 71, 986, 667, 1429, 1796, 766, 900, 371, 63 | 1934, 987, 1754, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 64 | 6417, 6417, 6417, 6417, 6417, 6417, 6417, 1439, 86, 1, 65 | 475, 205, 290, 950, 1886, 1986, 26, 2123, 923, 332, 66 | 1748, 96, 342, 1593, 857, 6417, 6417, 6417, 6417, 6417, 67 | 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 68 | 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 69 | 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 70 | 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 71 | 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 72 | 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 73 | 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 74 | 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 75 | 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 76 | 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 77 | 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 78 | 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 79 | 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 80 | 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 81 | 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417 82 | }; 83 | return asso_values[(unsigned char)str[13]+1] + asso_values[(unsigned char)str[12]] + asso_values[(unsigned char)str[4]+6] + asso_values[(unsigned char)str[0]+12]; 84 | } 85 | 86 | -------------------------------------------------------------------------------- /src/keyuser/src/gperf-hashes/mac.c: -------------------------------------------------------------------------------- 1 | /* ANSI-C code produced by gperf version 3.1 */ 2 | /* Command-line: ./bin/gperf */ 3 | /* Computed positions: -k'1,5,13-14' */ 4 | 5 | #include "gperf-hashes.h" 6 | 7 | #if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \ 8 | && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \ 9 | && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \ 10 | && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \ 11 | && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \ 12 | && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \ 13 | && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \ 14 | && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \ 15 | && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \ 16 | && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \ 17 | && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \ 18 | && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \ 19 | && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \ 20 | && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \ 21 | && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \ 22 | && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \ 23 | && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \ 24 | && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \ 25 | && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \ 26 | && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \ 27 | && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \ 28 | && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \ 29 | && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126)) 30 | /* The character set is not based on ISO-646. */ 31 | #error "gperf generated tables don't work with this execution character set. Please report a bug to ." 32 | #endif 33 | 34 | 35 | #define TOTAL_KEYWORDS 1000 36 | #define MIN_WORD_LENGTH 17 37 | #define MAX_WORD_LENGTH 17 38 | #define MIN_HASH_VALUE 15 39 | #define MAX_HASH_VALUE 6416 40 | /* maximum key range = 6402, duplicates = 0 */ 41 | 42 | #ifdef __GNUC__ 43 | __inline 44 | #else 45 | #ifdef __cplusplus 46 | inline 47 | #endif 48 | #endif 49 | /*ARGSUSED*/ 50 | unsigned int 51 | GperfMACHash (const char *str, size_t len) 52 | { 53 | static unsigned short asso_values[] = 54 | { 55 | 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 56 | 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 57 | 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 58 | 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 59 | 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 1494, 825, 60 | 41, 206, 865, 435, 0, 10, 60, 25, 475, 1694, 61 | 80, 405, 665, 45, 27, 300, 15, 5, 0, 30, 62 | 150, 175, 71, 986, 667, 1429, 1796, 766, 900, 371, 63 | 1934, 987, 1754, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 64 | 6417, 6417, 6417, 6417, 6417, 6417, 6417, 1439, 86, 1, 65 | 475, 205, 290, 950, 1886, 1986, 26, 2123, 923, 332, 66 | 1748, 96, 342, 1593, 857, 6417, 6417, 6417, 6417, 6417, 67 | 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 68 | 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 69 | 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 70 | 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 71 | 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 72 | 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 73 | 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 74 | 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 75 | 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 76 | 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 77 | 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 78 | 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 79 | 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 80 | 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 81 | 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417 82 | }; 83 | return asso_values[(unsigned char)str[13]+1] + asso_values[(unsigned char)str[12]] + asso_values[(unsigned char)str[4]+6] + asso_values[(unsigned char)str[0]+12]; 84 | } 85 | 86 | -------------------------------------------------------------------------------- /src/keyuser/src/gperf-hashes/ipv6.c: -------------------------------------------------------------------------------- 1 | /* ANSI-C code produced by gperf version 3.1 */ 2 | /* Command-line: ./bin/gperf */ 3 | /* Computed positions: -k'1,4,9,28' */ 4 | 5 | #include "gperf-hashes.h" 6 | 7 | #if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \ 8 | && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \ 9 | && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \ 10 | && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \ 11 | && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \ 12 | && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \ 13 | && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \ 14 | && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \ 15 | && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \ 16 | && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \ 17 | && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \ 18 | && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \ 19 | && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \ 20 | && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \ 21 | && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \ 22 | && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \ 23 | && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \ 24 | && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \ 25 | && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \ 26 | && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \ 27 | && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \ 28 | && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \ 29 | && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126)) 30 | /* The character set is not based on ISO-646. */ 31 | #error "gperf generated tables don't work with this execution character set. Please report a bug to ." 32 | #endif 33 | 34 | 35 | #define TOTAL_KEYWORDS 1000 36 | #define MIN_WORD_LENGTH 39 37 | #define MAX_WORD_LENGTH 39 38 | #define MIN_HASH_VALUE 10 39 | #define MAX_HASH_VALUE 5736 40 | /* maximum key range = 5727, duplicates = 0 */ 41 | 42 | #ifdef __GNUC__ 43 | __inline 44 | #else 45 | #ifdef __cplusplus 46 | inline 47 | #endif 48 | #endif 49 | /*ARGSUSED*/ 50 | unsigned int 51 | GperfIPV6Hash (const char *str, size_t len) 52 | { 53 | static unsigned short asso_values[] = 54 | { 55 | 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 56 | 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 57 | 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 58 | 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 59 | 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 1439, 12, 60 | 136, 132, 835, 1314, 190, 20, 55, 230, 1000, 430, 61 | 615, 10, 5, 165, 60, 0, 265, 25, 0, 30, 62 | 15, 450, 1023, 995, 751, 960, 230, 1969, 96, 510, 63 | 200, 188, 2048, 86, 46, 5737, 5737, 5737, 5737, 5737, 64 | 5737, 5737, 5737, 5737, 5737, 5737, 5737, 920, 882, 937, 65 | 1826, 244, 501, 1834, 1746, 557, 1579, 115, 1, 721, 66 | 961, 5, 700, 57, 1506, 890, 362, 5737, 5737, 5737, 67 | 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 68 | 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 69 | 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 70 | 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 71 | 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 72 | 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 73 | 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 74 | 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 75 | 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 76 | 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 77 | 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 78 | 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 79 | 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 80 | 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 81 | 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737 82 | }; 83 | return asso_values[(unsigned char)str[27]+10] + asso_values[(unsigned char)str[8]+6] + asso_values[(unsigned char)str[3]] + asso_values[(unsigned char)str[0]+14]; 84 | } 85 | 86 | -------------------------------------------------------------------------------- /src/keyuser/src/gperf-hashes/url_complex.c: -------------------------------------------------------------------------------- 1 | /* ANSI-C code produced by gperf version 3.1 */ 2 | /* Command-line: ./bin/gperf input.txt */ 3 | /* Computed positions: -k'24,59-61' */ 4 | 5 | #include "gperf-hashes.h" 6 | 7 | #if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \ 8 | && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \ 9 | && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \ 10 | && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \ 11 | && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \ 12 | && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \ 13 | && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \ 14 | && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \ 15 | && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \ 16 | && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \ 17 | && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \ 18 | && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \ 19 | && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \ 20 | && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \ 21 | && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \ 22 | && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \ 23 | && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \ 24 | && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \ 25 | && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \ 26 | && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \ 27 | && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \ 28 | && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \ 29 | && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126)) 30 | /* The character set is not based on ISO-646. */ 31 | #error "gperf generated tables don't work with this execution character set. Please report a bug to ." 32 | #endif 33 | 34 | 35 | #define TOTAL_KEYWORDS 1000 36 | #define MIN_WORD_LENGTH 83 37 | #define MAX_WORD_LENGTH 83 38 | #define MIN_HASH_VALUE 25 39 | #define MAX_HASH_VALUE 4734 40 | /* maximum key range = 4710, duplicates = 0 */ 41 | 42 | #ifdef __GNUC__ 43 | __inline 44 | #else 45 | #ifdef __cplusplus 46 | inline 47 | #endif 48 | #endif 49 | /*ARGSUSED*/ 50 | unsigned int 51 | GperfUrlComplexHash (const char *str, size_t len) 52 | { 53 | static unsigned short asso_values[] = 54 | { 55 | 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 56 | 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 57 | 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 58 | 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 59 | 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 909, 17, 60 | 955, 652, 17, 146, 181, 386, 726, 267, 4735, 67, 61 | 222, 1016, 42, 1449, 991, 1499, 591, 1699, 2044, 4735, 62 | 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 63 | 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 64 | 4735, 4735, 4735, 4735, 4735, 4735, 4735, 121, 575, 730, 65 | 880, 985, 16, 315, 111, 245, 930, 1, 180, 30, 66 | 60, 45, 0, 460, 215, 40, 15, 5, 25, 0, 67 | 125, 535, 5, 1018, 1476, 1854, 1869, 937, 1394, 1609, 68 | 987, 867, 1789, 51, 4735, 4735, 4735, 4735, 4735, 4735, 69 | 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 70 | 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 71 | 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 72 | 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 73 | 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 74 | 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 75 | 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 76 | 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 77 | 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 78 | 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 79 | 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 80 | 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 81 | 4735, 4735, 4735, 4735, 4735, 4735, 4735 82 | }; 83 | return asso_values[(unsigned char)str[60]+11] + asso_values[(unsigned char)str[59]] + asso_values[(unsigned char)str[58]] + asso_values[(unsigned char)str[23]]; 84 | } 85 | 86 | -------------------------------------------------------------------------------- /src/keyuser/src/gperf-hashes/ipv4.c: -------------------------------------------------------------------------------- 1 | /* ANSI-C code produced by gperf version 3.1 */ 2 | /* Command-line: ./bin/gperf */ 3 | /* Computed positions: -k'8,14,22' */ 4 | 5 | #include "gperf-hashes.h" 6 | 7 | #if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \ 8 | && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \ 9 | && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \ 10 | && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \ 11 | && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \ 12 | && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \ 13 | && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \ 14 | && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \ 15 | && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \ 16 | && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \ 17 | && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \ 18 | && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \ 19 | && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \ 20 | && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \ 21 | && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \ 22 | && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \ 23 | && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \ 24 | && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \ 25 | && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \ 26 | && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \ 27 | && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \ 28 | && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \ 29 | && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126)) 30 | /* The character set is not based on ISO-646. */ 31 | #error "gperf generated tables don't work with this execution character set. Please report a bug to ." 32 | #endif 33 | 34 | 35 | #define TOTAL_KEYWORDS 1000 36 | #define MIN_WORD_LENGTH 25 37 | #define MAX_WORD_LENGTH 27 38 | #define MIN_HASH_VALUE 27 39 | #define MAX_HASH_VALUE 3126 40 | /* maximum key range = 3100, duplicates = 0 */ 41 | 42 | #ifdef __GNUC__ 43 | __inline 44 | #else 45 | #ifdef __cplusplus 46 | inline 47 | #endif 48 | #endif 49 | 50 | unsigned int 51 | GperfIPV4Hash (const char *str, size_t len) 52 | { 53 | static unsigned short asso_values[] = 54 | { 55 | 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 56 | 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 57 | 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 58 | 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 59 | 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 60 | 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 61 | 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 62 | 3127, 464, 75, 0, 942, 55, 20, 350, 100, 820, 63 | 395, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 64 | 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 65 | 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 66 | 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 67 | 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 940, 862, 68 | 1000, 126, 11, 376, 700, 281, 162, 860, 181, 980, 69 | 22, 147, 416, 561, 16, 951, 516, 171, 567, 26, 70 | 781, 315, 70, 15, 6, 751, 0, 5, 0, 211, 71 | 80, 370, 115, 0, 240, 795, 465, 95, 40, 20, 72 | 285, 185, 630, 620, 570, 10, 1015, 545, 585, 165, 73 | 30, 196, 305, 510, 560, 80, 345, 25, 70, 850, 74 | 15, 135, 636, 772, 741, 92, 977, 420, 1669, 151, 75 | 42, 803, 662, 986, 58, 588, 792, 1444, 1074, 172, 76 | 102, 360, 72, 672, 846, 3127, 3127, 3127, 3127, 3127, 77 | 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 78 | 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 79 | 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 80 | 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 81 | 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 82 | 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127 83 | }; 84 | return len + asso_values[(unsigned char)str[21]] + asso_values[(unsigned char)str[13]] + asso_values[(unsigned char)str[7]+23]; 85 | } 86 | 87 | -------------------------------------------------------------------------------- /src/keyuser/src/gperf-hashes/ssn.c: -------------------------------------------------------------------------------- 1 | /* ANSI-C code produced by gperf version 3.1 */ 2 | /* Command-line: ./bin/gperf */ 3 | /* Computed positions: -k'1-3,5-6,10' */ 4 | 5 | #include "gperf-hashes.h" 6 | 7 | #if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \ 8 | && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \ 9 | && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \ 10 | && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \ 11 | && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \ 12 | && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \ 13 | && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \ 14 | && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \ 15 | && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \ 16 | && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \ 17 | && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \ 18 | && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \ 19 | && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \ 20 | && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \ 21 | && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \ 22 | && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \ 23 | && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \ 24 | && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \ 25 | && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \ 26 | && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \ 27 | && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \ 28 | && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \ 29 | && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126)) 30 | /* The character set is not based on ISO-646. */ 31 | #error "gperf generated tables don't work with this execution character set. Please report a bug to ." 32 | #endif 33 | 34 | 35 | #define TOTAL_KEYWORDS 1000 36 | #define MIN_WORD_LENGTH 11 37 | #define MAX_WORD_LENGTH 11 38 | #define MIN_HASH_VALUE 100 39 | #define MAX_HASH_VALUE 9570 40 | /* maximum key range = 9471, duplicates = 0 */ 41 | 42 | #ifdef __GNUC__ 43 | __inline 44 | #else 45 | #ifdef __cplusplus 46 | inline 47 | #endif 48 | #endif 49 | /*ARGSUSED*/ 50 | unsigned int 51 | GperfSSNHash (const char *str, size_t len) 52 | { 53 | static unsigned short asso_values[] = 54 | { 55 | 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 56 | 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 57 | 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 58 | 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 59 | 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 3048, 933, 60 | 1999, 17, 80, 680, 1005, 370, 20, 35, 85, 185, 61 | 175, 35, 1889, 5, 15, 0, 1, 461, 847, 2123, 62 | 3418, 3667, 4346, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 63 | 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 64 | 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 65 | 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 66 | 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 67 | 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 68 | 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 69 | 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 70 | 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 71 | 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 72 | 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 73 | 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 74 | 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 75 | 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 76 | 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 77 | 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 78 | 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 79 | 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 80 | 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 81 | 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 82 | 9571 83 | }; 84 | return asso_values[(unsigned char)str[9]+1] + asso_values[(unsigned char)str[5]+4] + asso_values[(unsigned char)str[4]] + asso_values[(unsigned char)str[2]+11] + asso_values[(unsigned char)str[1]+8] + asso_values[(unsigned char)str[0]+15]; 85 | } 86 | 87 | -------------------------------------------------------------------------------- /src/keyuser/src/gperf-hashes/ints.c: -------------------------------------------------------------------------------- 1 | /* ANSI-C code produced by gperf version 3.1 */ 2 | /* Command-line: ./bin/gperf */ 3 | /* Computed positions: -k'1-2,18,23,51' */ 4 | 5 | #include "gperf-hashes.h" 6 | 7 | #if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \ 8 | && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \ 9 | && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \ 10 | && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \ 11 | && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \ 12 | && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \ 13 | && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \ 14 | && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \ 15 | && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \ 16 | && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \ 17 | && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \ 18 | && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \ 19 | && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \ 20 | && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \ 21 | && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \ 22 | && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \ 23 | && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \ 24 | && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \ 25 | && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \ 26 | && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \ 27 | && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \ 28 | && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \ 29 | && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126)) 30 | /* The character set is not based on ISO-646. */ 31 | #error "gperf generated tables don't work with this execution character set. Please report a bug to ." 32 | #endif 33 | 34 | 35 | #define TOTAL_KEYWORDS 1000 36 | #define MIN_WORD_LENGTH 100 37 | #define MAX_WORD_LENGTH 100 38 | #define MIN_HASH_VALUE 34 39 | #define MAX_HASH_VALUE 9933 40 | /* maximum key range = 9900, duplicates = 0 */ 41 | 42 | #ifdef __GNUC__ 43 | __inline 44 | #else 45 | #ifdef __cplusplus 46 | inline 47 | #endif 48 | #endif 49 | /*ARGSUSED*/ 50 | unsigned int 51 | GperfINTSHash (const char *str, size_t len) 52 | { 53 | static unsigned short asso_values[] = 54 | { 55 | 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 56 | 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 57 | 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 58 | 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 59 | 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 1329, 60 | 728, 1983, 1, 0, 115, 65, 400, 995, 55, 540, 61 | 240, 25, 886, 2868, 15, 160, 435, 5, 872, 2001, 62 | 3678, 0, 10, 40, 962, 2588, 1429, 2, 36, 3217, 63 | 1010, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 64 | 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 65 | 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 66 | 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 67 | 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 68 | 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 69 | 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 70 | 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 71 | 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 72 | 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 73 | 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 74 | 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 75 | 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 76 | 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 77 | 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 78 | 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 79 | 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 80 | 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 81 | 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 82 | 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934 83 | }; 84 | return asso_values[(unsigned char)str[50]+1] + asso_values[(unsigned char)str[22]+10] + asso_values[(unsigned char)str[17]+23] + asso_values[(unsigned char)str[1]+4] + asso_values[(unsigned char)str[0]+16]; 85 | } 86 | 87 | -------------------------------------------------------------------------------- /src/keyuser/src/google-hashes/city.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 Google, Inc. 2 | // 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy 4 | // of this software and associated documentation files (the "Software"), to deal 5 | // in the Software without restriction, including without limitation the rights 6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | // copies of the Software, and to permit persons to whom the Software is 8 | // furnished to do so, subject to the following conditions: 9 | // 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | // 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | // THE SOFTWARE. 20 | // 21 | // CityHash, by Geoff Pike and Jyrki Alakuijala 22 | // 23 | // http://code.google.com/p/cityhash/ 24 | // 25 | // This file provides a few functions for hashing strings. All of them are 26 | // high-quality functions in the sense that they pass standard tests such 27 | // as Austin Appleby's SMHasher. They are also fast. 28 | // 29 | // For 64-bit x86 code, on short strings, we don't know of anything faster than 30 | // CityHash64 that is of comparable quality. We believe our nearest competitor 31 | // is Murmur3. For 64-bit x86 code, CityHash64 is an excellent choice for hash 32 | // tables and most other hashing (excluding cryptography). 33 | // 34 | // For 64-bit x86 code, on long strings, the picture is more complicated. 35 | // On many recent Intel CPUs, such as Nehalem, Westmere, Sandy Bridge, etc., 36 | // CityHashCrc128 appears to be faster than all competitors of comparable 37 | // quality. CityHash128 is also good but not quite as fast. We believe our 38 | // nearest competitor is Bob Jenkins' Spooky. We don't have great data for 39 | // other 64-bit CPUs, but for long strings we know that Spooky is slightly 40 | // faster than CityHash on some relatively recent AMD x86-64 CPUs, for example. 41 | // Note that CityHashCrc128 is declared in citycrc.h. 42 | // 43 | // For 32-bit x86 code, we don't know of anything faster than CityHash32 that 44 | // is of comparable quality. We believe our nearest competitor is Murmur3A. 45 | // (On 64-bit CPUs, it is typically faster to use the other CityHash variants.) 46 | // 47 | // Functions in the CityHash family are not suitable for cryptography. 48 | // 49 | // Please see CityHash's README file for more details on our performance 50 | // measurements and so on. 51 | // 52 | // WARNING: This code has been only lightly tested on big-endian platforms! 53 | // It is known to work well on little-endian platforms that have a small penalty 54 | // for unaligned reads, such as current Intel and AMD moderate-to-high-end CPUs. 55 | // It should work on all 32-bit and 64-bit platforms that allow unaligned reads; 56 | // bug reports are welcome. 57 | // 58 | // By the way, for some hash functions, given strings a and b, the hash 59 | // of a+b is easily derived from the hashes of a and b. This property 60 | // doesn't hold for any hash functions in this file. 61 | 62 | #ifndef CITY_HASH_H_ 63 | #define CITY_HASH_H_ 64 | 65 | #include // for size_t. 66 | #include 67 | #include 68 | 69 | typedef uint8_t uint8; 70 | typedef uint32_t uint32; 71 | typedef uint64_t uint64; 72 | typedef std::pair uint128; 73 | 74 | inline uint64 Uint128Low64(const uint128& x) { return x.first; } 75 | inline uint64 Uint128High64(const uint128& x) { return x.second; } 76 | 77 | // Hash function for a byte array. 78 | uint64 CityHash64(const char *buf, size_t len); 79 | 80 | // Hash function for a byte array. For convenience, a 64-bit seed is also 81 | // hashed into the result. 82 | uint64 CityHash64WithSeed(const char *buf, size_t len, uint64 seed); 83 | 84 | // Hash function for a byte array. For convenience, two seeds are also 85 | // hashed into the result. 86 | uint64 CityHash64WithSeeds(const char *buf, size_t len, 87 | uint64 seed0, uint64 seed1); 88 | 89 | // Hash function for a byte array. 90 | uint128 CityHash128(const char *s, size_t len); 91 | 92 | // Hash function for a byte array. For convenience, a 128-bit seed is also 93 | // hashed into the result. 94 | uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed); 95 | 96 | // Hash function for a byte array. Most useful in 32-bit binaries. 97 | uint32 CityHash32(const char *buf, size_t len); 98 | 99 | // Hash 128 input bits down to 64 bits of output. 100 | // This is intended to be a reasonably good hash function. 101 | inline uint64 Hash128to64(const uint128& x) { 102 | // Murmur-inspired hashing. 103 | const uint64 kMul = 0x9ddfea08eb382d69ULL; 104 | uint64 a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul; 105 | a ^= (a >> 47); 106 | uint64 b = (Uint128High64(x) ^ a) * kMul; 107 | b ^= (b >> 47); 108 | b *= kMul; 109 | return b; 110 | } 111 | 112 | #endif // CITY_HASH_H_ 113 | -------------------------------------------------------------------------------- /src/keybuilder/src/main.c: -------------------------------------------------------------------------------- 1 | /* 2 | * The regex generator works by first assuming the entire string is always the 3 | * same, and adapting that assumption as it reads more inputs. For example, 4 | * imagine the following 3 lines of input: 5 | * 6 | * 10:45:AF 7 | * FF:35:AB 8 | * 07:cb:09 9 | * 10 | * After reading the first line, our program assumes 10:45:AF is a fixed string. 11 | * After reading the second line, our program updates that to a regex. Every byte 12 | * that is different from the original becomes a [] class: 13 | * 14 | * [0-Z][0-Z]:[0-9][0-9]:[A-Z][A-Z] 15 | * 16 | * Upon reading the third line, we will update again to: 17 | * 18 | * [0-Z][0-Z]:[0-z][0-z]:[0-Z][0-Z] 19 | */ 20 | 21 | #define _GNU_SOURCE 22 | 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | 30 | typedef struct Range { 31 | char start; 32 | char end; 33 | unsigned long count[256]; 34 | } Range; 35 | 36 | enum Class { 37 | Class_Num = 0x1, 38 | Class_Lowercase = 0x2, 39 | Class_UpperCase = 0x4, 40 | Class_Punct = 0x8, 41 | }; 42 | 43 | static int range_class(const Range range) { 44 | int class = 0; 45 | 46 | if (range.start >= '0' && range.start <= '9') 47 | class |= Class_Num; 48 | else if (range.start >= 'A' && range.start <= 'Z') 49 | class |= Class_UpperCase; 50 | else if (range.start >= 'a' && range.start <= 'z') 51 | class |= Class_Lowercase; 52 | else 53 | class |= Class_Punct; 54 | 55 | if (range.end >= '0' && range.end <= '9') 56 | class |= Class_Num; 57 | else if (range.end >= 'A' && range.end <= 'Z') 58 | class |= Class_UpperCase; 59 | else if (range.end >= 'a' && range.end <= 'z') 60 | class |= Class_Lowercase; 61 | else 62 | class |= Class_Punct; 63 | 64 | return class; 65 | } 66 | 67 | static void print_class(const Range range) { 68 | char start = 0; 69 | char end = 127; 70 | 71 | if (range.start >= '0' && range.start <= '9') 72 | start = '0'; 73 | else if (range.start >= 'A' && range.start <= 'Z') 74 | start = 'A'; 75 | else if (range.start >= 'a' && range.start <= 'z') 76 | start = 'a'; 77 | else 78 | start = '!'; 79 | 80 | if (range.end >= '0' && range.end <= '9') 81 | end = '9'; 82 | else if (range.end >= 'A' && range.end <= 'Z') 83 | end = 'Z'; 84 | else if (range.end >= 'a' && range.end <= 'z') 85 | end = 'z'; 86 | else 87 | end = '}'; 88 | 89 | printf("[%c-%c]", start, end); 90 | } 91 | 92 | static int is_special(const char ch) { 93 | return ch == '\\' 94 | || ch == '[' 95 | || ch == '{' 96 | || ch == ')' 97 | || ch == '(' 98 | || ch == '+' 99 | || ch == '*' 100 | || ch == '?' 101 | || ch == '.'; 102 | } 103 | 104 | int main(int argc, const char* argv[]) { 105 | 106 | if (argc > 1) { 107 | printf("keybuilder\n"); 108 | printf("\nDescription: keybuilder generates a regex from a series of strings separated by newlines\n"); 109 | printf("\nExample usage: `./keybuilder < keys.txt`\n"); 110 | printf("\nOptions:\n"); 111 | printf("\n -h Print this help\n"); 112 | printf("\n"); 113 | if (!(strcmp(argv[1], "-h") == 0 || strcmp(argv[1], "--help") == 0)) { 114 | fprintf(stderr, "ERROR: unrecognized argument: %s\n", argv[1]); 115 | return 1; 116 | } else { 117 | return 0; 118 | } 119 | } 120 | 121 | char* line = NULL; 122 | size_t n; 123 | 124 | // begin by reading the first line 125 | ssize_t line_size = getline(&line, &n, stdin); 126 | if (line_size == -1) { 127 | if (errno != 0) { 128 | fprintf(stderr, "failed to read initial line from standard input: %s\n", strerror(errno)); 129 | } else { 130 | fprintf(stderr, "must send strings in standard input!\n"); 131 | } 132 | free(line); 133 | return 1; 134 | } 135 | 136 | Range* ranges = malloc(line_size * sizeof(*ranges)); 137 | 138 | // set the initial ranges, according to the first line 139 | for (ssize_t i = 0; i < line_size - 1; ++i) { 140 | ranges[i].start = line[i]; 141 | ranges[i].end = line[i]; 142 | } 143 | 144 | 145 | // now, read every line, while updating the ranges. We assume all 146 | // lines have the same size, and exit with an error if they don't 147 | ssize_t in_bytes; 148 | ssize_t line_count = 1; 149 | while ((in_bytes = getline(&line, &n, stdin)) > -1) { 150 | if (in_bytes != line_size) { 151 | fprintf(stderr, "WARNING: lines have different size!\n"); 152 | } 153 | line_size = line_size < in_bytes ? line_size : in_bytes; 154 | 155 | for (ssize_t i = 0; i < line_size - 1; ++i) { 156 | ranges[i].start = line[i] < ranges[i].start ? line[i] : ranges[i].start; 157 | ranges[i].end = line[i] > ranges[i].end ? line[i] : ranges[i].end; 158 | ranges[i].count[line[i]]++; 159 | } 160 | 161 | ++line_count; 162 | } 163 | free(line); 164 | line = NULL; 165 | 166 | // finally, group consecutive identical ranges together. For example, 167 | // `[0-9][0-9][0-9]` will turn into `[0-9]{3}` 168 | ssize_t i = 0; 169 | while (i < line_size - 1) { 170 | const Range range = ranges[i++]; 171 | if (range.start == range.end) { 172 | if (is_special(range.start)) { 173 | putchar('\\'); 174 | } 175 | putchar(range.start); 176 | } else { 177 | int repetitions = 1; 178 | Range other = ranges[i++]; 179 | while (range_class(range) == range_class(other)) { 180 | other = ranges[i++]; 181 | ++repetitions; 182 | } 183 | --i; 184 | 185 | print_class(range); 186 | if (repetitions > 1) { 187 | printf("{%d}", repetitions); 188 | } 189 | } 190 | } 191 | puts(""); 192 | 193 | free(ranges); 194 | return 0; 195 | } 196 | -------------------------------------------------------------------------------- /src/keyuser/src/main.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file main.cpp 3 | * @brief Main file for the benchmarking program. 4 | * 5 | * This file includes the main function and other helper functions for the benchmarking program. 6 | * It parses command line arguments and runs the benchmarks accordingly. 7 | */ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #include "benchmarks.hpp" 16 | #include "customHashes.hpp" 17 | #include "registry.hpp" 18 | 19 | std::string correctBenchUsage(){ 20 | return "Correct Usage: ./benchmarks -i -s -e -n \n" 21 | " --hashes ... : list of hashes to run\n " 22 | " -i or --insert: integer that represents the percentage of insertion operations\n" 23 | " -s or --search: integer that represents the percentage of search operations\n" 24 | " -e or --elimination: integer that represents the percentage of elimination operations\n" 25 | " The sum of -i, -s, and -e should be 100.\n" 26 | " -n or --num-operations: integer that represents the number of 'times to perform a hash operation on the benchmark\n" 27 | " -r or --repetitions: number of times to repeat the benchmark\n" 28 | " -seed: integer that represents the seed for the random number generator\n " 29 | " --test-distribution: test the distribution of '--hashes' specified hash functions\n" 30 | " --distribution: specify the randon distribution of the keys to be used in the benchmark\n" 31 | " --hash-performance: test the execution time of the hash functions over a set of keys\n" 32 | " -v or --verbose: print the results of each operation\n" 33 | " -h or --help: print this message\n" 34 | ; 35 | } 36 | 37 | BenchmarkParameters parseArgs(int argc, char** argv){ 38 | BenchmarkParameters args; 39 | args.verbose = false; 40 | int i = 1; 41 | while(i benchmarks; 118 | 119 | // Register Benchmarks with a macro for beter legibility 120 | REGISTER_ALL_BENCHMARKS 121 | 122 | // Load keys from standard input into memory 123 | std::vector keys; 124 | std::string line; 125 | while(std::getline(std::cin, line)){ 126 | keys.push_back(line); 127 | } 128 | 129 | // Fill default hash functions to run 130 | if(args.hashesToRun.empty()){ 131 | args.hashesToRun.push_back("STDHashBin"); 132 | args.hashesToRun.push_back("FNVHash"); 133 | } 134 | // Delete benchmarks that are not in the list of hashes to run 135 | std::vector filteredBenchmarks; 136 | for(auto& bench : benchmarks){ 137 | // Check if we should run this hash based on arguments 138 | if(std::find(args.hashesToRun.begin(), 139 | args.hashesToRun.end(), 140 | bench->getHashName()) != args.hashesToRun.end()) 141 | { 142 | filteredBenchmarks.push_back(bench); 143 | } else { 144 | delete bench; 145 | } 146 | } 147 | 148 | // Run benchmarks 149 | if(args.hashPerformance){ 150 | testHashPerformance(filteredBenchmarks, keys, args); 151 | freeBenchmarks(filteredBenchmarks); 152 | return 0; 153 | } 154 | if(args.testDistribution){ 155 | testDistribution(filteredBenchmarks, keys); 156 | freeBenchmarks(filteredBenchmarks); 157 | return 0; 158 | } 159 | benchmarkExecutor(filteredBenchmarks, keys, args); 160 | freeBenchmarks(filteredBenchmarks); 161 | 162 | } 163 | 164 | -------------------------------------------------------------------------------- /src/keyuser/src/customHashes.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file customHashes.hpp 3 | * @brief This file contains a collection of custom hash functions. 4 | */ 5 | 6 | #ifndef CUSTOM_HASHES_HPP 7 | #define CUSTOM_HASHES_HPP 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | /** 16 | * @brief Struct for CityHash. 17 | */ 18 | struct CityHash{ 19 | /** 20 | * @brief Function call operator that calculates the hash of a key. 21 | * 22 | * @param key The fixed lenght std::string key to hash. 23 | * @return The hash of the key. 24 | */ 25 | std::size_t operator()(const std::string& key) const; 26 | }; 27 | 28 | /** 29 | * @brief Struct for AbseilHash. 30 | */ 31 | struct AbseilHash{ 32 | /** 33 | * @brief Function call operator that calculates the hash of a key. 34 | * 35 | * @param key The fixed lenght std::string key to hash. 36 | * @return The hash of the key. 37 | */ 38 | std::size_t operator()(const std::string& key) const; 39 | }; 40 | 41 | /** 42 | * @brief Struct for STDHashSrc. 43 | */ 44 | struct STDHashSrc{ 45 | /** 46 | * @brief Function call operator that calculates the hash of a key. 47 | * 48 | * @param key The fixed lenght std::string key to hash. 49 | * @return The hash of the key. 50 | */ 51 | std::size_t operator()(const std::string& key) const; 52 | }; 53 | 54 | /** 55 | * @brief Struct for STDHashBin. 56 | */ 57 | struct STDHashBin{ 58 | /** 59 | * @brief Function call operator that calculates the hash of a key. 60 | * 61 | * @param key The fixed lenght std::string key to hash. 62 | * @return The hash of the key. 63 | */ 64 | std::size_t operator()(const std::string& key) const; 65 | }; 66 | 67 | /** 68 | * @brief Struct for FNVHash. 69 | */ 70 | struct FNVHash { 71 | /** 72 | * @brief Function call operator that calculates the hash of a key. 73 | * 74 | * @param key The fixed lenght std::string key to hash. 75 | * @return The hash of the key. 76 | */ 77 | std::size_t operator()(const std::string& key) const; 78 | }; 79 | 80 | /** 81 | * @brief Struct for IPV4HashUnrolled. 82 | */ 83 | struct IPV4HashUnrolled{ 84 | /** 85 | * @brief Function call operator that calculates the hash of a key. 86 | * 87 | * @param key The fixed lenght std::string key to hash. 88 | * @return The hash of the key. 89 | */ 90 | std::size_t operator()(const std::string& key) const; 91 | }; 92 | 93 | /** 94 | * @brief Struct for IPV4HashMove. 95 | */ 96 | struct IPV4HashMove{ 97 | /** 98 | * @brief Function call operator that calculates the hash of a key. 99 | * 100 | * @param key The fixed lenght std::string key to hash. 101 | * @return The hash of the key. 102 | */ 103 | std::size_t operator()(const std::string& key) const; 104 | }; 105 | 106 | /** 107 | * @brief Struct for IntSimdHash. 108 | */ 109 | struct IntSimdHash{ 110 | /** 111 | * @brief Function call operator that calculates the hash of a key. 112 | * 113 | * @param key The fixed lenght std::string key to hash. 114 | * @return The hash of the key. 115 | */ 116 | std::size_t operator()(const std::string& key) const; 117 | }; 118 | 119 | /** 120 | * @brief Struct for PextMurmurINTS. 121 | */ 122 | struct PextMurmurINTS{ 123 | /** 124 | * @brief Function call operator that calculates the hash of a key. 125 | * 126 | * @param key The fixed lenght std::string key to hash. 127 | * @return The hash of the key. 128 | */ 129 | std::size_t operator()(const std::string& key) const; 130 | }; 131 | 132 | /** 133 | * @brief Struct for PextMurmurUrlComplex. 134 | */ 135 | struct PextMurmurUrlComplex{ 136 | /** 137 | * @brief Function call operator that calculates the hash of a key. 138 | * 139 | * @param key The fixed lenght std::string key to hash. 140 | * @return The hash of the key. 141 | */ 142 | std::size_t operator()(const std::string& key) const; 143 | }; 144 | 145 | /** 146 | * @brief Macro that declares several structs with the same pattern. 147 | * 148 | * Each struct has a function call operator that calculates the hash of a key. 149 | * The structs are named by concatenating different prefixes with the NAME argument. 150 | * 151 | * @param NAME The name to append to the struct names. 152 | */ 153 | #define DECLARE_SYNTH_OFFXOR_NAIVE_CHATGPT_AND_GPERF_FUNCTIONS(NAME) \ 154 | struct Aes ## NAME { \ 155 | std::size_t operator()(const std::string& key) const; \ 156 | }; \ 157 | struct Pext ## NAME { \ 158 | std::size_t operator()(const std::string& key) const; \ 159 | }; \ 160 | struct OffXor ## NAME { \ 161 | std::size_t operator()(const std::string& key) const; \ 162 | }; \ 163 | struct Naive ## NAME { \ 164 | std::size_t operator()(const std::string& key) const; \ 165 | }; \ 166 | struct Gpt ## NAME { \ 167 | std::size_t operator()(const std::string& key) const; \ 168 | }; \ 169 | struct Gperf ## NAME { \ 170 | std::size_t operator()(const std::string& key) const; \ 171 | }; 172 | 173 | // Use the macro to declare several structs. 174 | DECLARE_SYNTH_OFFXOR_NAIVE_CHATGPT_AND_GPERF_FUNCTIONS(UrlComplex) 175 | DECLARE_SYNTH_OFFXOR_NAIVE_CHATGPT_AND_GPERF_FUNCTIONS(Url) 176 | DECLARE_SYNTH_OFFXOR_NAIVE_CHATGPT_AND_GPERF_FUNCTIONS(Mac) 177 | DECLARE_SYNTH_OFFXOR_NAIVE_CHATGPT_AND_GPERF_FUNCTIONS(CPF) 178 | DECLARE_SYNTH_OFFXOR_NAIVE_CHATGPT_AND_GPERF_FUNCTIONS(SSN) 179 | DECLARE_SYNTH_OFFXOR_NAIVE_CHATGPT_AND_GPERF_FUNCTIONS(IPV4) 180 | DECLARE_SYNTH_OFFXOR_NAIVE_CHATGPT_AND_GPERF_FUNCTIONS(IPV6) 181 | DECLARE_SYNTH_OFFXOR_NAIVE_CHATGPT_AND_GPERF_FUNCTIONS(INTS) 182 | 183 | /** 184 | * @brief Struct for NaiveSimdUrlComplex. 185 | */ 186 | struct NaiveSimdUrlComplex{ 187 | /** 188 | * @brief Function call operator that calculates the hash of a key. 189 | * 190 | * @param key The fixed lenght std::string key to hash. 191 | * @return The hash of the key. 192 | */ 193 | std::size_t operator()(const std::string& key) const; 194 | }; 195 | 196 | /** 197 | * @brief Struct for NaiveSimdUrl. 198 | */ 199 | struct NaiveSimdUrl { 200 | /** 201 | * @brief Function call operator that calculates the hash of a key. 202 | * 203 | * @param key The fixed lenght std::string key to hash. 204 | * @return The hash of the key. 205 | */ 206 | std::size_t operator()(const std::string& key) const; 207 | }; 208 | 209 | /** 210 | * @brief Struct for NaiveSimdIPV6. 211 | */ 212 | struct NaiveSimdIPV6 { 213 | /** 214 | * @brief Function call operator that calculates the hash of a key. 215 | * 216 | * @param key The fixed lenght std::string key to hash. 217 | * @return The hash of the key. 218 | */ 219 | std::size_t operator()(const std::string& key) const; 220 | }; 221 | 222 | /** 223 | * @brief Struct for NaiveSimdINTS. 224 | */ 225 | struct NaiveSimdINTS { 226 | /** 227 | * @brief Function call operator that calculates the hash of a key. 228 | * 229 | * @param key The fixed lenght std::string key to hash. 230 | * @return The hash of the key. 231 | */ 232 | std::size_t operator()(const std::string& key) const; 233 | }; 234 | 235 | #endif 236 | -------------------------------------------------------------------------------- /scripts/keyuser_interpreter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # This script reads multiple .csv files produced by keyuser, extracts relevant 4 | # information and calculate metrics to compare the different hash functions 5 | 6 | import os 7 | import argparse 8 | import importlib 9 | import re 10 | 11 | # Data science boys 12 | import numpy as np 13 | import pandas as pd 14 | import matplotlib.pyplot as plt 15 | from scipy import stats 16 | 17 | def distribution_test(key, value): 18 | 19 | n = len(value) 20 | chi_worst_case = ((n-1)**2) + (n-1) 21 | 22 | # Calculate the observed frequencies of each value 23 | observed_freq, _ = np.histogram(value, bins=len(np.unique(value))) 24 | 25 | # Calculate the expected frequencies for a uniform distribution 26 | expected_freq = np.full_like(observed_freq, len(value) / len(np.unique(value))) 27 | 28 | # Perform the Chi-Square Goodness of Fit Test 29 | try: 30 | chi2, p = stats.chisquare(observed_freq, expected_freq) 31 | except Exception: 32 | p = 0.0 33 | chi2 = chi_worst_case 34 | 35 | skewness = stats.skew(value) 36 | 37 | return pd.DataFrame(pd.DataFrame({"Hash Function": [key], "Skewness": [skewness], "Chi-Test": [chi2], "Uniform?": [p > 0.05]})) 38 | 39 | def handle_distribution_analysis(args): 40 | try: 41 | abs_path = os.path.abspath(args.distribution) 42 | spec = importlib.util.spec_from_file_location(args.distribution.replace(".py",""),abs_path) 43 | arrays = importlib.util.module_from_spec(spec) 44 | spec.loader.exec_module(arrays) 45 | except ImportError: 46 | print(args.distribution, "is an invalid import! Please make sure that the module exists and is in the same directory as this script.") 47 | print("If it does not exist, you can obtain it by running keyuser with --test-distribution or bench-runner with --histogram ", args.distribution) 48 | exit(1) 49 | 50 | regex_name = os.path.basename(args.distribution).replace(".py", "") 51 | 52 | if args.hash_functions is None: 53 | args.hash_functions = [] 54 | for key, value in arrays.distributions.items(): 55 | args.hash_functions.append(key) 56 | 57 | result = None 58 | if args.plot_graph: 59 | file_destination = args.output_destination + regex_name + "_histogram.svg" 60 | _, ax = plt.subplots(figsize=(10, 5)) 61 | 62 | for key, value in arrays.distributions.items(): 63 | key = key.replace("array_", "") 64 | 65 | if args.plot_graph: 66 | ax.hist(value, label=key, alpha=0.5) 67 | 68 | if result is None: 69 | result = distribution_test(key, value) 70 | else: 71 | result = pd.concat([result, distribution_test(key, value)], ignore_index=True) 72 | 73 | if args.plot_graph: 74 | ax.legend(loc='upper left', bbox_to_anchor=(1, 1)) 75 | plt.tight_layout() 76 | plt.savefig(file_destination) 77 | print("Histograms saved in: ", file_destination) 78 | 79 | print("Below DataFrame from distribution file: ", args.distribution) 80 | print(result) 81 | output_path = args.output_destination + regex_name + "_chitest.csv" 82 | print("See all results in: ", output_path) 83 | result.to_csv(output_path, index=False) 84 | 85 | ######################## 86 | # Performance Analysis # 87 | ######################## 88 | 89 | def mannwhitneyu_from_dataframe(args, regex_name, df, column_to_group, column_to_compare): 90 | grouped = df.groupby(column_to_group) 91 | 92 | # Extract the groups from the DataFrame 93 | groups = {} 94 | groups = [group for group in grouped.groups] 95 | 96 | # Initialize an empty DataFrame 97 | result = pd.DataFrame() 98 | 99 | for hashFunc1 in groups: 100 | for hashFunc2 in groups: 101 | if hashFunc1 == hashFunc2: 102 | continue 103 | 104 | # Calculate the Mann-Whitney U test between the first two groups 105 | _, p = stats.mannwhitneyu(grouped.get_group(hashFunc1)[column_to_compare], grouped.get_group(hashFunc2)[column_to_compare]) 106 | 107 | # interpret 108 | alpha = 0.05 109 | 110 | # Append a new row to the DataFrame 111 | result = pd.concat([result, pd.DataFrame({"Hash Function 1": [hashFunc1], "Hash Function 2": [hashFunc2], "p-value": [p], "Same Distribution?": [p > alpha]})], ignore_index=True) 112 | 113 | print("Below DataFrame from Regex: ", regex_name) 114 | print(result) 115 | output_path = args.output_destination + regex_name + "_" + column_to_compare + "_mannwhitneyu.csv" 116 | print("See all results in: ", output_path) 117 | result.to_csv(output_path, index=False) 118 | 119 | def box_plot_dataframe(args, df, regex_name): 120 | grouped = df.groupby(['Execution Mode', 'Num Operations', 'Num Keys', 'Insertions (%)', 'Searches (%)', 'Eliminatons(%)']) 121 | 122 | # Extract the groups from the DataFrame 123 | groups = {} 124 | groups = [group for group in grouped.groups] 125 | 126 | for group in groups: 127 | temp = grouped.get_group(group)[['Execution Time (s)', 'Hash Function']] 128 | 129 | ax = grouped.get_group(group)[['Execution Time (s)', 'Hash Function']].boxplot(by='Hash Function', rot=45, showmeans=True) 130 | ax.set_ylabel('Execution Time (s)') 131 | ax.set_xlabel('') 132 | ax.set_title(group) 133 | plt.suptitle('') 134 | plt.tight_layout() 135 | plt.savefig(args.output_destination+regex_name+str(group)+'.svg') 136 | 137 | print("Boxplots saved in: ", args.output_destination) 138 | 139 | def performance_from_dataframe(args, df, regex_name): 140 | grouped = df.groupby(['Execution Mode', 'Num Operations', 'Num Keys', 'Insertions (%)', 'Searches (%)', 'Eliminatons(%)', 'Hash Function']) 141 | 142 | # Extract the groups from the DataFrame 143 | groups = {} 144 | groups = [group for group in grouped.groups] 145 | 146 | all_data = {} 147 | 148 | for group in groups: 149 | temp = grouped.get_group(group)[['Execution Time (s)', 'Collision Count']] 150 | hash_func_name = group[-1] 151 | 152 | if hash_func_name not in all_data: 153 | all_data[hash_func_name] = [(temp['Execution Time (s)'].mean(), temp['Collision Count'].mean())] 154 | else: 155 | all_data[hash_func_name].append((temp['Execution Time (s)'].mean(), temp['Collision Count'].mean())) 156 | 157 | # Geometric mean of all_data 158 | result = pd.DataFrame() 159 | 160 | for data in all_data: 161 | samples_geotime = 1.0 162 | samples_collision = 1.0 163 | for sample in all_data[data]: 164 | samples_geotime *= sample[0] 165 | if sample[1] != 0: 166 | samples_collision *= sample[1] 167 | samples_geotime = samples_geotime ** (1/len(all_data[data])) 168 | samples_collision = samples_collision ** (1/len(all_data[data])) 169 | result = pd.concat([result, pd.DataFrame({"Func Name": [data], "GeoTime": [samples_geotime], "GeoCollision": [samples_collision]})], ignore_index=True) 170 | 171 | print("Below DataFrame from Regex: ", regex_name) 172 | print(result) 173 | output_path = args.output_destination + regex_name + "_geomean.csv" 174 | print("See all results in: ", output_path) 175 | result.to_csv(output_path, index=False) 176 | 177 | # Normalize "Chi-Test" column using the "STDHashSrc" as the reference 178 | # result["GeoTime"] = 100.0 - (result["GeoTime"] / result[result["Func Name"] == "STDHashSrc"]["GeoTime"].values[0]) * 100.0 179 | # print("Normalized GeoTime %") 180 | # print(result) 181 | 182 | def handle_performance_analysis(args): 183 | 184 | # Load CSV files into pandas dataframe 185 | csv_files = args.performance 186 | dataframes = [pd.read_csv(file) for file in csv_files] 187 | 188 | regex_name = os.path.basename(csv_files[0]).replace(".csv", "") 189 | 190 | # Concatenate dataframes 191 | df = pd.concat(dataframes, ignore_index=True) 192 | 193 | # Calculate the Mann-Whitney U test 194 | mannwhitneyu_from_dataframe(args, regex_name, df, 'Hash Function', 'Execution Time (s)') 195 | mannwhitneyu_from_dataframe(args, regex_name, df, 'Hash Function', 'Collision Count') 196 | 197 | if args.plot_graph: 198 | box_plot_dataframe(args, df, regex_name) 199 | 200 | performance_from_dataframe(args, df, regex_name) 201 | 202 | def main(): 203 | parser = argparse.ArgumentParser(description="Keyuser Interpreter") 204 | parser.add_argument("-d", "--distribution", type=str, default="", help="Name of the distribution file to interpret. Exclusive with -p option.") 205 | parser.add_argument("-p", "--performance", nargs='*', type=str, default="", help="Name of the csv performance files to interpret. Exclusive with -d option.") 206 | parser.add_argument("-pg", "--plot-graph", action='store_true', help="Option to plot the results in graphs.") 207 | parser.add_argument("-od", "--output-destination", type=str, default="results/", help="Output path to output graphs. Default is current file.") 208 | parser.add_argument("-fp", "--full-print", action='store_true', help="Print the entire dataframe.") 209 | parser.add_argument("-hf", "--hash-functions", nargs='*', type=str, help="Name of the hash functions to analyize.") 210 | 211 | args = parser.parse_args() 212 | 213 | if args.full_print: 214 | pd.set_option('display.max_rows', None) 215 | pd.set_option('display.max_columns', None) 216 | pd.set_option('display.width', None) 217 | pd.set_option('display.max_colwidth', None) 218 | 219 | if args.performance: 220 | handle_performance_analysis(args) 221 | elif args.distribution: 222 | handle_distribution_analysis(args) 223 | 224 | main() 225 | -------------------------------------------------------------------------------- /src/sepe-runner/Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "anstream" 7 | version = "0.6.7" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "4cd2405b3ac1faab2990b74d728624cd9fd115651fcecc7c2d8daf01376275ba" 10 | dependencies = [ 11 | "anstyle", 12 | "anstyle-parse", 13 | "anstyle-query", 14 | "anstyle-wincon", 15 | "colorchoice", 16 | "utf8parse", 17 | ] 18 | 19 | [[package]] 20 | name = "anstyle" 21 | version = "1.0.4" 22 | source = "registry+https://github.com/rust-lang/crates.io-index" 23 | checksum = "7079075b41f533b8c61d2a4d073c4676e1f8b249ff94a393b0595db304e0dd87" 24 | 25 | [[package]] 26 | name = "anstyle-parse" 27 | version = "0.2.3" 28 | source = "registry+https://github.com/rust-lang/crates.io-index" 29 | checksum = "c75ac65da39e5fe5ab759307499ddad880d724eed2f6ce5b5e8a26f4f387928c" 30 | dependencies = [ 31 | "utf8parse", 32 | ] 33 | 34 | [[package]] 35 | name = "anstyle-query" 36 | version = "1.0.2" 37 | source = "registry+https://github.com/rust-lang/crates.io-index" 38 | checksum = "e28923312444cdd728e4738b3f9c9cac739500909bb3d3c94b43551b16517648" 39 | dependencies = [ 40 | "windows-sys", 41 | ] 42 | 43 | [[package]] 44 | name = "anstyle-wincon" 45 | version = "3.0.2" 46 | source = "registry+https://github.com/rust-lang/crates.io-index" 47 | checksum = "1cd54b81ec8d6180e24654d0b371ad22fc3dd083b6ff8ba325b72e00c87660a7" 48 | dependencies = [ 49 | "anstyle", 50 | "windows-sys", 51 | ] 52 | 53 | [[package]] 54 | name = "clap" 55 | version = "4.4.17" 56 | source = "registry+https://github.com/rust-lang/crates.io-index" 57 | checksum = "80932e03c33999b9235edb8655bc9df3204adc9887c2f95b50cb1deb9fd54253" 58 | dependencies = [ 59 | "clap_builder", 60 | "clap_derive", 61 | ] 62 | 63 | [[package]] 64 | name = "clap_builder" 65 | version = "4.4.17" 66 | source = "registry+https://github.com/rust-lang/crates.io-index" 67 | checksum = "d6c0db58c659eef1c73e444d298c27322a1b52f6927d2ad470c0c0f96fa7b8fa" 68 | dependencies = [ 69 | "anstream", 70 | "anstyle", 71 | "clap_lex", 72 | "strsim", 73 | ] 74 | 75 | [[package]] 76 | name = "clap_derive" 77 | version = "4.4.7" 78 | source = "registry+https://github.com/rust-lang/crates.io-index" 79 | checksum = "cf9804afaaf59a91e75b022a30fb7229a7901f60c755489cc61c9b423b836442" 80 | dependencies = [ 81 | "heck", 82 | "proc-macro2", 83 | "quote", 84 | "syn", 85 | ] 86 | 87 | [[package]] 88 | name = "clap_lex" 89 | version = "0.6.0" 90 | source = "registry+https://github.com/rust-lang/crates.io-index" 91 | checksum = "702fc72eb24e5a1e48ce58027a675bc24edd52096d5397d4aea7c6dd9eca0bd1" 92 | 93 | [[package]] 94 | name = "colorchoice" 95 | version = "1.0.0" 96 | source = "registry+https://github.com/rust-lang/crates.io-index" 97 | checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" 98 | 99 | [[package]] 100 | name = "equivalent" 101 | version = "1.0.1" 102 | source = "registry+https://github.com/rust-lang/crates.io-index" 103 | checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" 104 | 105 | [[package]] 106 | name = "hashbrown" 107 | version = "0.14.3" 108 | source = "registry+https://github.com/rust-lang/crates.io-index" 109 | checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" 110 | 111 | [[package]] 112 | name = "heck" 113 | version = "0.4.1" 114 | source = "registry+https://github.com/rust-lang/crates.io-index" 115 | checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" 116 | 117 | [[package]] 118 | name = "indexmap" 119 | version = "2.1.0" 120 | source = "registry+https://github.com/rust-lang/crates.io-index" 121 | checksum = "d530e1a18b1cb4c484e6e34556a0d948706958449fca0cab753d649f2bce3d1f" 122 | dependencies = [ 123 | "equivalent", 124 | "hashbrown", 125 | ] 126 | 127 | [[package]] 128 | name = "memchr" 129 | version = "2.7.1" 130 | source = "registry+https://github.com/rust-lang/crates.io-index" 131 | checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" 132 | 133 | [[package]] 134 | name = "proc-macro2" 135 | version = "1.0.76" 136 | source = "registry+https://github.com/rust-lang/crates.io-index" 137 | checksum = "95fc56cda0b5c3325f5fbbd7ff9fda9e02bb00bb3dac51252d2f1bfa1cb8cc8c" 138 | dependencies = [ 139 | "unicode-ident", 140 | ] 141 | 142 | [[package]] 143 | name = "quote" 144 | version = "1.0.35" 145 | source = "registry+https://github.com/rust-lang/crates.io-index" 146 | checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" 147 | dependencies = [ 148 | "proc-macro2", 149 | ] 150 | 151 | [[package]] 152 | name = "sepe-runner" 153 | version = "0.1.0" 154 | dependencies = [ 155 | "clap", 156 | "toml", 157 | ] 158 | 159 | [[package]] 160 | name = "serde" 161 | version = "1.0.195" 162 | source = "registry+https://github.com/rust-lang/crates.io-index" 163 | checksum = "63261df402c67811e9ac6def069e4786148c4563f4b50fd4bf30aa370d626b02" 164 | dependencies = [ 165 | "serde_derive", 166 | ] 167 | 168 | [[package]] 169 | name = "serde_derive" 170 | version = "1.0.195" 171 | source = "registry+https://github.com/rust-lang/crates.io-index" 172 | checksum = "46fe8f8603d81ba86327b23a2e9cdf49e1255fb94a4c5f297f6ee0547178ea2c" 173 | dependencies = [ 174 | "proc-macro2", 175 | "quote", 176 | "syn", 177 | ] 178 | 179 | [[package]] 180 | name = "serde_spanned" 181 | version = "0.6.5" 182 | source = "registry+https://github.com/rust-lang/crates.io-index" 183 | checksum = "eb3622f419d1296904700073ea6cc23ad690adbd66f13ea683df73298736f0c1" 184 | dependencies = [ 185 | "serde", 186 | ] 187 | 188 | [[package]] 189 | name = "strsim" 190 | version = "0.10.0" 191 | source = "registry+https://github.com/rust-lang/crates.io-index" 192 | checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" 193 | 194 | [[package]] 195 | name = "syn" 196 | version = "2.0.48" 197 | source = "registry+https://github.com/rust-lang/crates.io-index" 198 | checksum = "0f3531638e407dfc0814761abb7c00a5b54992b849452a0646b7f65c9f770f3f" 199 | dependencies = [ 200 | "proc-macro2", 201 | "quote", 202 | "unicode-ident", 203 | ] 204 | 205 | [[package]] 206 | name = "toml" 207 | version = "0.8.8" 208 | source = "registry+https://github.com/rust-lang/crates.io-index" 209 | checksum = "a1a195ec8c9da26928f773888e0742ca3ca1040c6cd859c919c9f59c1954ab35" 210 | dependencies = [ 211 | "serde", 212 | "serde_spanned", 213 | "toml_datetime", 214 | "toml_edit", 215 | ] 216 | 217 | [[package]] 218 | name = "toml_datetime" 219 | version = "0.6.5" 220 | source = "registry+https://github.com/rust-lang/crates.io-index" 221 | checksum = "3550f4e9685620ac18a50ed434eb3aec30db8ba93b0287467bca5826ea25baf1" 222 | dependencies = [ 223 | "serde", 224 | ] 225 | 226 | [[package]] 227 | name = "toml_edit" 228 | version = "0.21.0" 229 | source = "registry+https://github.com/rust-lang/crates.io-index" 230 | checksum = "d34d383cd00a163b4a5b85053df514d45bc330f6de7737edfe0a93311d1eaa03" 231 | dependencies = [ 232 | "indexmap", 233 | "serde", 234 | "serde_spanned", 235 | "toml_datetime", 236 | "winnow", 237 | ] 238 | 239 | [[package]] 240 | name = "unicode-ident" 241 | version = "1.0.12" 242 | source = "registry+https://github.com/rust-lang/crates.io-index" 243 | checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" 244 | 245 | [[package]] 246 | name = "utf8parse" 247 | version = "0.2.1" 248 | source = "registry+https://github.com/rust-lang/crates.io-index" 249 | checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" 250 | 251 | [[package]] 252 | name = "windows-sys" 253 | version = "0.52.0" 254 | source = "registry+https://github.com/rust-lang/crates.io-index" 255 | checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" 256 | dependencies = [ 257 | "windows-targets", 258 | ] 259 | 260 | [[package]] 261 | name = "windows-targets" 262 | version = "0.52.0" 263 | source = "registry+https://github.com/rust-lang/crates.io-index" 264 | checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd" 265 | dependencies = [ 266 | "windows_aarch64_gnullvm", 267 | "windows_aarch64_msvc", 268 | "windows_i686_gnu", 269 | "windows_i686_msvc", 270 | "windows_x86_64_gnu", 271 | "windows_x86_64_gnullvm", 272 | "windows_x86_64_msvc", 273 | ] 274 | 275 | [[package]] 276 | name = "windows_aarch64_gnullvm" 277 | version = "0.52.0" 278 | source = "registry+https://github.com/rust-lang/crates.io-index" 279 | checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea" 280 | 281 | [[package]] 282 | name = "windows_aarch64_msvc" 283 | version = "0.52.0" 284 | source = "registry+https://github.com/rust-lang/crates.io-index" 285 | checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef" 286 | 287 | [[package]] 288 | name = "windows_i686_gnu" 289 | version = "0.52.0" 290 | source = "registry+https://github.com/rust-lang/crates.io-index" 291 | checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313" 292 | 293 | [[package]] 294 | name = "windows_i686_msvc" 295 | version = "0.52.0" 296 | source = "registry+https://github.com/rust-lang/crates.io-index" 297 | checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a" 298 | 299 | [[package]] 300 | name = "windows_x86_64_gnu" 301 | version = "0.52.0" 302 | source = "registry+https://github.com/rust-lang/crates.io-index" 303 | checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd" 304 | 305 | [[package]] 306 | name = "windows_x86_64_gnullvm" 307 | version = "0.52.0" 308 | source = "registry+https://github.com/rust-lang/crates.io-index" 309 | checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e" 310 | 311 | [[package]] 312 | name = "windows_x86_64_msvc" 313 | version = "0.52.0" 314 | source = "registry+https://github.com/rust-lang/crates.io-index" 315 | checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04" 316 | 317 | [[package]] 318 | name = "winnow" 319 | version = "0.5.34" 320 | source = "registry+https://github.com/rust-lang/crates.io-index" 321 | checksum = "b7cf47b659b318dccbd69cc4797a39ae128f533dce7902a1096044d1967b9c16" 322 | dependencies = [ 323 | "memchr", 324 | ] 325 | -------------------------------------------------------------------------------- /src/keygen/Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "anstream" 7 | version = "0.6.11" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "6e2e1ebcb11de5c03c67de28a7df593d32191b44939c482e97702baaaa6ab6a5" 10 | dependencies = [ 11 | "anstyle", 12 | "anstyle-parse", 13 | "anstyle-query", 14 | "anstyle-wincon", 15 | "colorchoice", 16 | "utf8parse", 17 | ] 18 | 19 | [[package]] 20 | name = "anstyle" 21 | version = "1.0.6" 22 | source = "registry+https://github.com/rust-lang/crates.io-index" 23 | checksum = "8901269c6307e8d93993578286ac0edf7f195079ffff5ebdeea6a59ffb7e36bc" 24 | 25 | [[package]] 26 | name = "anstyle-parse" 27 | version = "0.2.3" 28 | source = "registry+https://github.com/rust-lang/crates.io-index" 29 | checksum = "c75ac65da39e5fe5ab759307499ddad880d724eed2f6ce5b5e8a26f4f387928c" 30 | dependencies = [ 31 | "utf8parse", 32 | ] 33 | 34 | [[package]] 35 | name = "anstyle-query" 36 | version = "1.0.2" 37 | source = "registry+https://github.com/rust-lang/crates.io-index" 38 | checksum = "e28923312444cdd728e4738b3f9c9cac739500909bb3d3c94b43551b16517648" 39 | dependencies = [ 40 | "windows-sys", 41 | ] 42 | 43 | [[package]] 44 | name = "anstyle-wincon" 45 | version = "3.0.2" 46 | source = "registry+https://github.com/rust-lang/crates.io-index" 47 | checksum = "1cd54b81ec8d6180e24654d0b371ad22fc3dd083b6ff8ba325b72e00c87660a7" 48 | dependencies = [ 49 | "anstyle", 50 | "windows-sys", 51 | ] 52 | 53 | [[package]] 54 | name = "autocfg" 55 | version = "1.1.0" 56 | source = "registry+https://github.com/rust-lang/crates.io-index" 57 | checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" 58 | 59 | [[package]] 60 | name = "cfg-if" 61 | version = "1.0.0" 62 | source = "registry+https://github.com/rust-lang/crates.io-index" 63 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 64 | 65 | [[package]] 66 | name = "clap" 67 | version = "4.4.18" 68 | source = "registry+https://github.com/rust-lang/crates.io-index" 69 | checksum = "1e578d6ec4194633722ccf9544794b71b1385c3c027efe0c55db226fc880865c" 70 | dependencies = [ 71 | "clap_builder", 72 | "clap_derive", 73 | ] 74 | 75 | [[package]] 76 | name = "clap_builder" 77 | version = "4.4.18" 78 | source = "registry+https://github.com/rust-lang/crates.io-index" 79 | checksum = "4df4df40ec50c46000231c914968278b1eb05098cf8f1b3a518a95030e71d1c7" 80 | dependencies = [ 81 | "anstream", 82 | "anstyle", 83 | "clap_lex", 84 | "strsim", 85 | ] 86 | 87 | [[package]] 88 | name = "clap_derive" 89 | version = "4.4.7" 90 | source = "registry+https://github.com/rust-lang/crates.io-index" 91 | checksum = "cf9804afaaf59a91e75b022a30fb7229a7901f60c755489cc61c9b423b836442" 92 | dependencies = [ 93 | "heck", 94 | "proc-macro2", 95 | "quote", 96 | "syn", 97 | ] 98 | 99 | [[package]] 100 | name = "clap_lex" 101 | version = "0.6.0" 102 | source = "registry+https://github.com/rust-lang/crates.io-index" 103 | checksum = "702fc72eb24e5a1e48ce58027a675bc24edd52096d5397d4aea7c6dd9eca0bd1" 104 | 105 | [[package]] 106 | name = "colorchoice" 107 | version = "1.0.0" 108 | source = "registry+https://github.com/rust-lang/crates.io-index" 109 | checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" 110 | 111 | [[package]] 112 | name = "getrandom" 113 | version = "0.2.12" 114 | source = "registry+https://github.com/rust-lang/crates.io-index" 115 | checksum = "190092ea657667030ac6a35e305e62fc4dd69fd98ac98631e5d3a2b1575a12b5" 116 | dependencies = [ 117 | "cfg-if", 118 | "libc", 119 | "wasi", 120 | ] 121 | 122 | [[package]] 123 | name = "heck" 124 | version = "0.4.1" 125 | source = "registry+https://github.com/rust-lang/crates.io-index" 126 | checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" 127 | 128 | [[package]] 129 | name = "keygen" 130 | version = "0.1.0" 131 | dependencies = [ 132 | "clap", 133 | "rand", 134 | "rand_distr", 135 | ] 136 | 137 | [[package]] 138 | name = "libc" 139 | version = "0.2.153" 140 | source = "registry+https://github.com/rust-lang/crates.io-index" 141 | checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" 142 | 143 | [[package]] 144 | name = "libm" 145 | version = "0.2.8" 146 | source = "registry+https://github.com/rust-lang/crates.io-index" 147 | checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" 148 | 149 | [[package]] 150 | name = "num-traits" 151 | version = "0.2.17" 152 | source = "registry+https://github.com/rust-lang/crates.io-index" 153 | checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c" 154 | dependencies = [ 155 | "autocfg", 156 | "libm", 157 | ] 158 | 159 | [[package]] 160 | name = "ppv-lite86" 161 | version = "0.2.17" 162 | source = "registry+https://github.com/rust-lang/crates.io-index" 163 | checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" 164 | 165 | [[package]] 166 | name = "proc-macro2" 167 | version = "1.0.78" 168 | source = "registry+https://github.com/rust-lang/crates.io-index" 169 | checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae" 170 | dependencies = [ 171 | "unicode-ident", 172 | ] 173 | 174 | [[package]] 175 | name = "quote" 176 | version = "1.0.35" 177 | source = "registry+https://github.com/rust-lang/crates.io-index" 178 | checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" 179 | dependencies = [ 180 | "proc-macro2", 181 | ] 182 | 183 | [[package]] 184 | name = "rand" 185 | version = "0.8.5" 186 | source = "registry+https://github.com/rust-lang/crates.io-index" 187 | checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" 188 | dependencies = [ 189 | "libc", 190 | "rand_chacha", 191 | "rand_core", 192 | ] 193 | 194 | [[package]] 195 | name = "rand_chacha" 196 | version = "0.3.1" 197 | source = "registry+https://github.com/rust-lang/crates.io-index" 198 | checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" 199 | dependencies = [ 200 | "ppv-lite86", 201 | "rand_core", 202 | ] 203 | 204 | [[package]] 205 | name = "rand_core" 206 | version = "0.6.4" 207 | source = "registry+https://github.com/rust-lang/crates.io-index" 208 | checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" 209 | dependencies = [ 210 | "getrandom", 211 | ] 212 | 213 | [[package]] 214 | name = "rand_distr" 215 | version = "0.4.3" 216 | source = "registry+https://github.com/rust-lang/crates.io-index" 217 | checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31" 218 | dependencies = [ 219 | "num-traits", 220 | "rand", 221 | ] 222 | 223 | [[package]] 224 | name = "strsim" 225 | version = "0.10.0" 226 | source = "registry+https://github.com/rust-lang/crates.io-index" 227 | checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" 228 | 229 | [[package]] 230 | name = "syn" 231 | version = "2.0.48" 232 | source = "registry+https://github.com/rust-lang/crates.io-index" 233 | checksum = "0f3531638e407dfc0814761abb7c00a5b54992b849452a0646b7f65c9f770f3f" 234 | dependencies = [ 235 | "proc-macro2", 236 | "quote", 237 | "unicode-ident", 238 | ] 239 | 240 | [[package]] 241 | name = "unicode-ident" 242 | version = "1.0.12" 243 | source = "registry+https://github.com/rust-lang/crates.io-index" 244 | checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" 245 | 246 | [[package]] 247 | name = "utf8parse" 248 | version = "0.2.1" 249 | source = "registry+https://github.com/rust-lang/crates.io-index" 250 | checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" 251 | 252 | [[package]] 253 | name = "wasi" 254 | version = "0.11.0+wasi-snapshot-preview1" 255 | source = "registry+https://github.com/rust-lang/crates.io-index" 256 | checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" 257 | 258 | [[package]] 259 | name = "windows-sys" 260 | version = "0.52.0" 261 | source = "registry+https://github.com/rust-lang/crates.io-index" 262 | checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" 263 | dependencies = [ 264 | "windows-targets", 265 | ] 266 | 267 | [[package]] 268 | name = "windows-targets" 269 | version = "0.52.0" 270 | source = "registry+https://github.com/rust-lang/crates.io-index" 271 | checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd" 272 | dependencies = [ 273 | "windows_aarch64_gnullvm", 274 | "windows_aarch64_msvc", 275 | "windows_i686_gnu", 276 | "windows_i686_msvc", 277 | "windows_x86_64_gnu", 278 | "windows_x86_64_gnullvm", 279 | "windows_x86_64_msvc", 280 | ] 281 | 282 | [[package]] 283 | name = "windows_aarch64_gnullvm" 284 | version = "0.52.0" 285 | source = "registry+https://github.com/rust-lang/crates.io-index" 286 | checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea" 287 | 288 | [[package]] 289 | name = "windows_aarch64_msvc" 290 | version = "0.52.0" 291 | source = "registry+https://github.com/rust-lang/crates.io-index" 292 | checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef" 293 | 294 | [[package]] 295 | name = "windows_i686_gnu" 296 | version = "0.52.0" 297 | source = "registry+https://github.com/rust-lang/crates.io-index" 298 | checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313" 299 | 300 | [[package]] 301 | name = "windows_i686_msvc" 302 | version = "0.52.0" 303 | source = "registry+https://github.com/rust-lang/crates.io-index" 304 | checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a" 305 | 306 | [[package]] 307 | name = "windows_x86_64_gnu" 308 | version = "0.52.0" 309 | source = "registry+https://github.com/rust-lang/crates.io-index" 310 | checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd" 311 | 312 | [[package]] 313 | name = "windows_x86_64_gnullvm" 314 | version = "0.52.0" 315 | source = "registry+https://github.com/rust-lang/crates.io-index" 316 | checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e" 317 | 318 | [[package]] 319 | name = "windows_x86_64_msvc" 320 | version = "0.52.0" 321 | source = "registry+https://github.com/rust-lang/crates.io-index" 322 | checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04" 323 | -------------------------------------------------------------------------------- /src/keygen/src/main.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | io::{stdout, BufWriter, Write}, 3 | num::NonZeroU32, 4 | }; 5 | 6 | use clap::{Parser, ValueEnum}; 7 | use rand::{Rng, SeedableRng}; 8 | use rand_distr::{Distribution as Dist, Normal, Uniform}; 9 | 10 | /// Represents a List (e.g. `[0-9]`) of possible values 11 | #[derive(Debug)] 12 | struct List { 13 | inner: String, 14 | /// A complement is a list with a caret: [^0-9]. It means we should generate all values BUT 15 | /// those in the list 16 | complement: bool, 17 | } 18 | 19 | impl List { 20 | /// Returns a random character from the list 21 | pub fn generate>(&self, r: &mut R, d: &D) -> char { 22 | if !self.complement { 23 | self.inner 24 | .chars() 25 | .nth((d.sample(r).rem_euclid(1.0) * (self.inner.len() - 1) as f64) as usize) 26 | .unwrap() 27 | } else { 28 | let mut ch = rand::random(); 29 | while self.inner.contains(ch) { 30 | ch = (ch as u8 + 1) as char; 31 | } 32 | ch 33 | } 34 | } 35 | 36 | /// Returns the specified character from the list, decrementing the `i` accordingly 37 | pub fn generate_inc(&self, i: &mut u64) -> char { 38 | if !self.complement { 39 | let ch = self.inner.chars().cycle().nth(*i as usize).unwrap(); 40 | let n = self.inner.chars().count() as u64; 41 | *i = if *i < n { 0 } else { *i / n }; 42 | ch 43 | } else { 44 | let mut ch = 45 | char::from_u32(*i as u32 + 32).expect("failed to generate incremental '.'"); 46 | while self.inner.contains(ch) { 47 | ch = (ch as u8 + 1) as char; 48 | } 49 | *i = (*i).saturating_sub(1); 50 | ch 51 | } 52 | } 53 | } 54 | 55 | /// Possible Regex symbols 56 | #[derive(Debug)] 57 | enum RegexSymbol { 58 | /// stands for the exact character it contains 59 | Literal(char), 60 | /// a parenthesized group, e.g. `(inner_regex)` 61 | Group(Vec), 62 | /// list, e.g. `[0-9]` 63 | List(List), 64 | } 65 | 66 | /// A Regex is a symbol and its possible modifiers 67 | /// 68 | /// In this program, we only accept repetitions `{n}` as modifiers 69 | /// The operators `+`, `*` and `?` are all not accepted 70 | #[derive(Debug)] 71 | struct Regex { 72 | symbol: RegexSymbol, 73 | repetitions: Option, 74 | } 75 | 76 | impl Regex { 77 | /// Initializes a new Regex from a symbol, setting all modifiers to false 78 | pub fn new(symbol: RegexSymbol) -> Self { 79 | Self { 80 | symbol, 81 | repetitions: None, 82 | } 83 | } 84 | 85 | /// Returns random characters according to the regular expression rules 86 | pub fn generate>(&self, r: &mut R, d: &D) -> String { 87 | let repetitions = self.repetitions.map(|r| r.get()).unwrap_or(1); 88 | let mut s = String::with_capacity(repetitions as usize); 89 | for _ in 0..repetitions { 90 | match &self.symbol { 91 | RegexSymbol::Literal(literal) => s.push(*literal), 92 | RegexSymbol::Group(group) => s.push_str(&generate(group, r, d)), 93 | RegexSymbol::List(list) => s.push(list.generate(r, d)), 94 | } 95 | } 96 | 97 | s 98 | } 99 | 100 | /// Returns incremental characters according to the regular expression rules 101 | pub fn generate_inc(&self, i: &mut u64) -> String { 102 | let repetitions = self.repetitions.map(|r| r.get()).unwrap_or(1); 103 | let mut s = String::with_capacity(repetitions as usize); 104 | for _ in 0..repetitions { 105 | match &self.symbol { 106 | RegexSymbol::Literal(literal) => s.insert(0, *literal), 107 | RegexSymbol::Group(group) => s = generate_inc(group, i) + &s, 108 | RegexSymbol::List(list) => s.insert(0, list.generate_inc(i)), 109 | } 110 | } 111 | 112 | s 113 | } 114 | } 115 | 116 | /// generate a random regex 117 | fn generate>(regexes: &[Regex], r: &mut R, d: &D) -> String { 118 | let mut s = String::with_capacity(regexes.len()); 119 | for regex in regexes { 120 | s.push_str(®ex.generate(r, d)); 121 | } 122 | s 123 | } 124 | 125 | /// generate a specific regex, according to `i` 126 | fn generate_inc(regexes: &[Regex], i: &mut u64) -> String { 127 | let mut s = String::with_capacity(regexes.len()); 128 | for regex in regexes.iter().rev() { 129 | s = regex.generate_inc(i) + &s; 130 | } 131 | s 132 | } 133 | 134 | /// parse a list ([0-9]) 135 | fn parse_list(chars: &mut std::str::Chars) -> Regex { 136 | let mut inner = String::new(); 137 | 138 | let first = chars.next().unwrap(); 139 | let complement = first == '^'; 140 | if !complement { 141 | inner.push(first); 142 | } 143 | 144 | while let Some(ch) = chars.next() { 145 | if ch == ']' { 146 | break; 147 | } else if ch == '-' { 148 | let prev = match inner.pop() { 149 | Some(ch) => ch, 150 | None => { 151 | inner.push('-'); 152 | continue; 153 | } 154 | }; 155 | 156 | let next = chars.next().unwrap(); 157 | if next == ']' { 158 | inner.push(prev); 159 | inner.push('-'); 160 | break; 161 | } 162 | 163 | for ch in prev..=next { 164 | inner.push(ch); 165 | } 166 | } else { 167 | inner.push(ch); 168 | } 169 | } 170 | 171 | Regex::new(RegexSymbol::List(List { inner, complement })) 172 | } 173 | 174 | /// parse a list ([0-9]) 175 | fn parse_repetitions(chars: &mut std::str::Chars) -> NonZeroU32 { 176 | let mut s = String::new(); 177 | for ch in chars.by_ref() { 178 | if ch == '}' { 179 | break; 180 | } 181 | s.push(ch); 182 | } 183 | 184 | let uint: u32 = s 185 | .trim() 186 | .parse() 187 | .unwrap_or_else(|_| panic!("failed to parse: {} as an unsigned integer", s)); 188 | NonZeroU32::new(uint).expect("integer in a '{}' repetition must be > 0") 189 | } 190 | 191 | /// parse a group ((inner_regex)) 192 | fn parse_group(chars: &mut std::str::Chars) -> Regex { 193 | use RegexSymbol::*; 194 | 195 | let mut group: Vec = Vec::new(); 196 | 197 | while let Some(ch) = chars.next() { 198 | match ch { 199 | '\\' => { 200 | let next = chars.next().unwrap(); 201 | group.push(Regex::new(Literal(next))); 202 | } 203 | '[' => group.push(parse_list(chars)), 204 | '{' => group.last_mut().unwrap().repetitions = Some(parse_repetitions(chars)), 205 | ')' => break, 206 | '(' => group.push(parse_group(chars)), 207 | ch => group.push(Regex::new(Literal(ch))), 208 | } 209 | } 210 | 211 | Regex::new(Group(group)) 212 | } 213 | 214 | /// parse the regex 215 | fn parse_regex(mut chars: std::str::Chars) -> Vec { 216 | use RegexSymbol::*; 217 | 218 | let mut tree: Vec = Vec::new(); 219 | 220 | while let Some(ch) = chars.next() { 221 | match ch { 222 | '\\' => { 223 | let next = chars.next().unwrap(); 224 | tree.push(Regex::new(Literal(next))); 225 | } 226 | '[' => tree.push(parse_list(&mut chars)), 227 | '{' => tree.last_mut().unwrap().repetitions = Some(parse_repetitions(&mut chars)), 228 | ')' => panic!("badly formatted regex!"), 229 | '(' => tree.push(parse_group(&mut chars)), 230 | ch => tree.push(Regex::new(Literal(ch))), 231 | } 232 | } 233 | 234 | tree 235 | } 236 | 237 | #[derive(Clone, Copy, ValueEnum)] 238 | /// Distribution to use when randomly generating the characters 239 | enum Distribution { 240 | /// Uniform distribution 241 | Uniform, 242 | /// Normal distribution 243 | Normal, 244 | /// Incremental distribution (VERY SLOW) 245 | /// 246 | /// For example, a regex like [0-9]{3} will produce '001', '002', '003', and so on, in order. 247 | Incremental, 248 | } 249 | 250 | #[derive(Parser)] 251 | #[command(version, name = "keygen")] 252 | /// `keygen` generates random strings based on a regex 253 | /// 254 | /// Note **DO NOT IMPLEMENT** several regexes operations: 255 | /// * the `*` operator 256 | /// * the `+` operator 257 | /// * the `?` operator 258 | /// * the `.` operator 259 | /// 260 | /// Regexes passed to this program may only make use of `()` groups, `[]` lists of characters and 261 | /// `{n}`, where `n` is an integer. For example, to generate 3 digit numbers, you would use: 262 | /// `[0-9]{3}` 263 | struct Command { 264 | /// Regex used to generate the strings 265 | /// 266 | /// Attention! The OR operator (|) is not implemented! 267 | regex: String, 268 | 269 | /// Number of elements to generate 270 | #[clap(short, long, default_value = "100")] 271 | number: u64, 272 | 273 | /// Seed used for random number generation 274 | #[clap(short, long, default_value = "223554")] 275 | seed: u64, 276 | 277 | /// Distribution used in random generation 278 | #[clap(short, long, default_value = "uniform")] 279 | distribution: Distribution, 280 | } 281 | 282 | fn main() { 283 | let cmd = Command::parse(); 284 | 285 | let mut r = rand::rngs::StdRng::seed_from_u64(cmd.seed); 286 | 287 | let regex = parse_regex(cmd.regex.chars()); 288 | 289 | #[cfg(debug_assertions)] 290 | { 291 | println!("{regex:#?}"); 292 | } 293 | 294 | let stdout = stdout(); 295 | let lock = stdout.lock(); 296 | let mut writer = BufWriter::new(lock); 297 | match cmd.distribution { 298 | Distribution::Uniform => { 299 | let d = Uniform::new(0.0, 1.0); 300 | for _ in 0..cmd.number { 301 | writeln!(writer, "{}", generate(®ex, &mut r, &d)).unwrap(); 302 | } 303 | } 304 | Distribution::Normal => { 305 | let d = Normal::new(0.5, 0.25).unwrap(); 306 | for _ in 0..cmd.number { 307 | writeln!(writer, "{}", generate(®ex, &mut r, &d)).unwrap(); 308 | } 309 | } 310 | Distribution::Incremental => { 311 | for mut i in 0..cmd.number { 312 | writeln!(writer, "{}", generate_inc(®ex, &mut i)).unwrap(); 313 | } 314 | } 315 | } 316 | } 317 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | Sepe Banner
3 |

4 | 5 | ## Introduction 6 | 7 | This project's goal is to devise, implement, and evaluate techniques for generating optimized hash functions tailored for string keys whose format can be approximated by a regular expression inferred through profiling. These functions will be used to improve the performance of C++'s STL data structures, such as `std::unordered_map`, `std::unordered_set`, `std::unordered_multimap` and `std::unordered_multiset`, in addition to any other `std::hash` specialization for user-defined C++ types. 8 | 9 | ### Dependencies 10 | 11 | These are the most important dependencies for building and running all Sepe programs: 12 | 13 | | Dependency | Version | Installation Link | 14 | |------------|-----------|----------------------------------------------| 15 | | clang | >= 14.0.0 | [llvm.org](https://llvm.org/docs/CMake.html) | 16 | | CMake | >= 3.20 | [cmake.org](https://cmake.org/install/) | 17 | | Rust | >= 1.7 | [rust.org](https://www.rust-lang.org/tools/install)| 18 | | Python | >= 3.10 | [python.org](https://wiki.python.org/moin/BeginnersGuide/Download)| 19 | 20 | Rust is only necessary if you want to run the experiments. If you are only interested in the hash functions generation, only `clang` is necessary. 21 | 22 | ## Quick-Start: Synthesizing functions 23 | 24 | You can follow these two steps to use optimized hash functions generated from this project: 25 | 26 | 1. Obtain your synthesized hash function in one of the two ways: 27 | 1. Using a set of [key examples](#synthesizing-from-key-examples). 28 | 2. Using the [regular expression of the keys](#synthesizing-from-regular-expression). 29 | 2. [Integrate](#integrating-the-synthesized-function-into-your-project) the optimized hash function into your code . 30 | 31 | ### Synthesizing from Key Examples 32 | 33 | To synthesize hash functions from key examples, you only need to create a file containing a non-exhaustive but representative key set. 34 | 35 | Supposing your key strings are saved in the `txt-file-with-strings` file, you can run the following command: 36 | 37 | ```sh 38 | ./bin/keysynth "$(./bin/keybuilder < txt-file-with-strings)" 39 | ``` 40 | 41 | ### Synthesizing from Regular Expression 42 | 43 | To build the hash function from the regular expression of your keys, use: 44 | 45 | ```sh 46 | make 47 | ./scripts/make_hash_from_regex.sh [REGEX] 48 | ``` 49 | 50 | Example: *Generating a custom hash function for IPV4 keys* 51 | ```sh 52 | ./scripts/make_hash_from_regex.sh "(([0-9]{3})\.){3}[0-9]{3}" #or single quotes in zshell 53 | ``` 54 | 55 | See more about regular expressions in the [keygen](#keygen) section. 56 | 57 | ### Integrating the Synthesized function into your project 58 | 59 | Suppose your code has a C++ STL std::unordered_map with IPV4 std::string as keys and int as values. 60 | 61 | ```cpp 62 | void yourCode(void){ 63 | std::unordered_map map; 64 | map["255.255.255.255"] = 42; 65 | // more code that uses map object 66 | } 67 | ``` 68 | 69 | After running, `./scripts/make_hash_from_regex.sh "(([0-9]{3})\.){3}[0-9]{3}"`, you should get the following output with two function options: 70 | 71 | ```cpp 72 | // Helper function, include in your codebase: 73 | inline static uint64_t load_u64_le(const char* b) { 74 | uint64_t Ret; 75 | // This is a way for the compiler to optimize this func to a single movq instruction 76 | memcpy(&Ret, b, sizeof(uint64_t)); 77 | return Ret; 78 | } 79 | // Pext Hash Function: 80 | struct synthesizedPextHash { 81 | // Omitted for brevity in this code snippet 82 | }; 83 | // OffXor Hash Function: 84 | struct synthesizedOffXorHash { 85 | std::size_t operator()(const std::string& key) const { 86 | const std::size_t hashable0 = load_u64_le(key.c_str()+0); 87 | const std::size_t hashable1 = load_u64_le(key.c_str()+7); 88 | size_t tmp0 = hashable0 ^ hashable1; 89 | return tmp0; 90 | } 91 | }; 92 | ``` 93 | 94 | *If in doubt, we always recommend using the synthesizedOffXorHash variant, according to our benchmarks.* 95 | Copy and paste the desired hash function, in this example, `synthesizedOffXorHash`, into your codebase and then add its name as the third argument in the std::unordered_map template. 96 | 97 | ```cpp 98 | inline static uint64_t load_u64_le(const char* b) { 99 | uint64_t Ret; 100 | // This is a way for the compiler to optimize this func to a single movq instruction 101 | memcpy(&Ret, b, sizeof(uint64_t)); 102 | return Ret; 103 | } 104 | 105 | struct synthesizedOffXorHash { 106 | std::size_t operator()(const std::string& key) const { 107 | const std::size_t hashable0 = load_u64_le(key.c_str()+0); 108 | const std::size_t hashable1 = load_u64_le(key.c_str()+7); 109 | size_t tmp0 = hashable0 ^ hashable1; 110 | return tmp0; 111 | } 112 | }; 113 | 114 | void yourCode(void){ 115 | std::unordered_map map; 116 | map["255.255.255.255"] = 42; 117 | // more code that uses map object 118 | } 119 | ``` 120 | 121 | ## Quick-Start: Benchmarking 122 | 123 | Building and running with default parameters: 124 | 125 | ```sh 126 | ./scripts/install_abseil.sh # necessary for keyuser 127 | make && make benchmark 128 | ./bin/sepe-runner [REGEXES] 129 | ``` 130 | Valid regexes are listed in the `Regexes.toml` file. 131 | 132 | Example: *Benchmarking all IPV4 hash functions with default parameters* 133 | ``` 134 | ./bin/sepe-runner IPV4 135 | ./scripts/keyuser_interpreter.py -p IPV4_performance.csv 136 | ``` 137 | 138 | For more options, see [sepe-runner](#sepe-runner) section: 139 | 140 | ## Sepe Components 141 | 142 | ### keygen 143 | 144 | `keygen` generates (standard output) n random keys from Regex. 145 | 146 | Not all valid regexes are accepted since we did not implement the `OR` (`|`), `Kleene Star` (`*`), `Plus` (`+`), and `DOT` (`.`) operators. 147 | 148 | ```sh 149 | ./bin/keygen REGEX [number_of_elements] [seed] 150 | ``` 151 | 152 | Example: *Generating 2 random IPV4 keys with seed 223554* 153 | 154 | ```sh 155 | ./bin/keygen "(([0-9]{3})\.){3}[0-9]{3}" -n 2 -s 223554 156 | 313.797.178.390 157 | 445.982.868.308 158 | ``` 159 | 160 | For more options, do: 161 | ```sh 162 | ./bin/keygen --help 163 | ``` 164 | 165 | ### keyuser 166 | 167 | *We recommend using keyuser via [sepe-runner](#sepe-runner)* 168 | 169 | `keyuser` benchmarks custom hash functions with keys received from standard input. 170 | 171 | ```sh 172 | | ./bin/keyuser [hashes] [seed] [verbose] 173 | ``` 174 | 175 | **If no [hashes] are specified, only generic hash functions are executed** 176 | 177 | Example: *Benchmarking 2 IPV4 Keys with 10 total operations using STDHashBin PextIPV4 hash functions. 50% insertions, 30% search, and 20% elimination operations.* 178 | 179 | ```sh 180 | ./bin/keygen "(([0-9]{3})\.){3}[0-9]{3}" -n 2 -s 223554 | ./bin/keyuser --hashes STDHashBin PextIPV4 -n 10 -i 50 -s 30 -e 20 181 | ``` 182 | 183 | For more options, do: 184 | ```sh 185 | ./bin/keyuser --help 186 | ``` 187 | 188 | ### keybuilder 189 | 190 | `keybuilder` creates a regex from a series of strings passed through standard input, separated by a new line. 191 | 192 | ```sh 193 | ./bin/keybuilder < txt-file-with-strings 194 | ``` 195 | 196 | ### keysynth 197 | 198 | `keysynth` synthesizes the hash functions based on the regex generated by the `keybuilder`. It is picky about the regex's format, so it is not recommended to hand-write it. Use `keybuilder` instead. 199 | 200 | ```sh 201 | ./bin/keysynth "$(./bin/keybuilder < txt-file-with-strings)" 202 | ``` 203 | 204 | ### sepe-runner 205 | 206 | `sepe-runner` is a helper program that connects the other programs together as needed. 207 | 208 | `Regexes.toml` is a configuration file containing all accepted `sepe-runner` regular expressions and their associated Hash Functions. *Changing this file also requires changing `keyuser`.* 209 | 210 | ```sh 211 | ./bin/sepe-runner Regex-entry-in-Regexes.toml 212 | ``` 213 | Some relevant parameters are: 214 | - `-k, --keys`: Number of keys to generate 215 | - `-o, --operations`: Number of operations to run 216 | - `-i, --insert`: Percentage of insertion operations 217 | - `-s, --search`: Percentage of search operations 218 | - `-e, --elimination`: Percentage of elimination operations 219 | - `--histogram`: Generate the distribution histogram for the given regex, do not run experiments 220 | 221 | Example: *Running the IPV4 benchmark* 222 | 223 | ```sh 224 | ./bin/sepe-runner IPV4 225 | ``` 226 | 227 | For more options, do: 228 | ```sh 229 | ./bin/sepe-runner --help 230 | ``` 231 | 232 | ## Helper Scripts 233 | 234 | The `scripts` folder contains some helper scripts that may be useful for some people: 235 | 236 | * `align_csv.sh` - pretty prints `keyuser`'s generated `.csv` files for easier analysis 237 | * `benchmark.sh` - helper to run many benchmarks at once 238 | * `install_abseil.sh` - installs the abseil library locally. Necessary for `keyuser` 239 | * `make_hash_from_regex.sh` - creates a hash function from a user-defined regex 240 | * `keyuser_interpreter.py` - interprets the results generated from `keyuser`'s benchmarks 241 | 242 | ### Using `keyuser_interpreter.py` 243 | 244 | This script is used to help interpret the output of `keyuser`. It can plot graphs, generate tables, and perform statistical analysis. 245 | 246 | The most relevant configurations are: 247 | 248 | ``` 249 | -d DISTRIBUTION, --distribution DISTRIBUTION 250 | Name of the distribution file to interpret. Exclusive with -p option. 251 | -p [PERFORMANCE ...], --performance [PERFORMANCE ...] 252 | Name of the CSV performance files to interpret. Exclusive with -d option. 253 | -pg, --plot-graph Option to plot the results in graphs. 254 | -hf [HASH_FUNCTIONS ...], --hash-functions [HASH_FUNCTIONS ...] 255 | Name of the hash functions to analyze. 256 | ``` 257 | 258 | Example for interpreting performance using IPV4 keys: 259 | ```sh 260 | ./bin/sepe-runner IPV4 && ./scripts/keyuser_interpreter.py -p IPV4_performance.csv 261 | ``` 262 | 263 | Example for interpreting hash distribution using IPV4 keys: 264 | ```sh 265 | ./bin/sepe-runner --histogram IPV4 && ./scripts/keyuser_interpreter.py -d IPV4_distribution.py 266 | ``` 267 | 268 | ### Reproducing Experiments 269 | 270 | The [artifact branch](https://github.com/lac-dcc/sepe/tree/artifact) reproduces the research questions from the paper. 271 | All scripts to reproduce the RQs are available in a Docker container. 272 | RQ1 and RQ2 can be reproduced with a single script \texttt{rq1\_rq2\_benchmark.sh}. 273 | All other RQs have an individual script \texttt{rq\_benchmark.sh}. 274 | 275 | -------------------------------------------------------------------------------- /src/keyuser/src/benchmarks.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file benchmarks.cpp 3 | * @brief Implementation of benchmarking functions. 4 | * 5 | * This file includes the implementation of functions for executing benchmarks. 6 | * The benchmarks are performed on a set of keys and according to a set of parameters. 7 | */ 8 | 9 | #include "benchmarks.hpp" 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | void executeInterweaved(Benchmark* bench, 16 | const std::vector& keys, 17 | const BenchmarkParameters& args) 18 | { 19 | // Seed random number generator 20 | srand(args.seed); 21 | 22 | // Interweaved execution mode parameters 23 | int numInsert = (args.insert * args.numOperations) / 100; 24 | 25 | 26 | if ( args.distribution == "normal" ) { 27 | // Create a binomial distribution with parameters n=10 and p=0.5 28 | std::default_random_engine generator; 29 | std::binomial_distribution distribution(keys.size(), 0.5); 30 | 31 | // First, insert 50% of the numInserts 32 | for(int j = 0; j < numInsert/2; j++){ 33 | int randomKey = distribution(generator) % keys.size(); 34 | bench->insert(keys[randomKey]); 35 | } 36 | for(int j = 0; j < (args.numOperations-(numInsert/2)); j++){ 37 | int randomKey = distribution(generator) % keys.size(); 38 | int randomOp = rand() % 100; 39 | if(randomOp < args.insert){ 40 | bench->insert(keys[randomKey]); 41 | }else if(randomOp < args.insert + args.search){ 42 | bench->search(keys[randomKey]); 43 | }else{ 44 | bench->elimination(keys[randomKey]); 45 | } 46 | } 47 | } else if ( args.distribution == "uniform" ) { 48 | // First, insert 50% of the numInserts 49 | for(int j = 0; j < numInsert/2; j++){ 50 | int randomKey = rand() % keys.size(); 51 | bench->insert(keys[randomKey]); 52 | } 53 | for(int j = 0; j < (args.numOperations-(numInsert/2)); j++){ 54 | int randomKey = rand() % keys.size(); 55 | int randomOp = rand() % 100; 56 | if(randomOp < args.insert){ 57 | bench->insert(keys[randomKey]); 58 | }else if(randomOp < args.insert + args.search){ 59 | bench->search(keys[randomKey]); 60 | }else{ 61 | bench->elimination(keys[randomKey]); 62 | } 63 | } 64 | } 65 | 66 | } 67 | 68 | void executeBatched(Benchmark* bench, 69 | const std::vector& keys, 70 | const BenchmarkParameters& args) 71 | { 72 | // Seed random number generator 73 | srand(args.seed); 74 | 75 | // Batch execution mode parameters 76 | int numInsert = (args.insert * args.numOperations) / 100; 77 | int numSearch = (args.search * args.numOperations) / 100; 78 | int numElimination = (args.elimination * args.numOperations) / 100; 79 | 80 | if ( args.distribution == "normal" ) { 81 | // Create a binomial distribution with parameters n=10 and p=0.5 82 | std::default_random_engine generator; 83 | std::binomial_distribution distribution(keys.size(), 0.5); 84 | for(int j = 0; j < numInsert; j++){ 85 | int randomKey = distribution(generator) % keys.size(); 86 | bench->insert(keys[randomKey]); 87 | } 88 | for(int j = 0; j < numSearch; j++){ 89 | int randomKey = distribution(generator) % keys.size(); 90 | bench->search(keys[randomKey]); 91 | } 92 | for(int j = 0; j < numElimination; j++){ 93 | int randomKey = distribution(generator) % keys.size(); 94 | bench->elimination(keys[randomKey]); 95 | } 96 | } else if ( args.distribution == "uniform" ) { 97 | for(int j = 0; j < numInsert; j++){ 98 | int randomKey = rand() % keys.size(); 99 | bench->insert(keys[randomKey]); 100 | } 101 | for(int j = 0; j < numSearch; j++){ 102 | int randomKey = rand() % keys.size(); 103 | bench->search(keys[randomKey]); 104 | } 105 | for(int j = 0; j < numElimination; j++){ 106 | int randomKey = rand() % keys.size(); 107 | bench->elimination(keys[randomKey]); 108 | } 109 | } 110 | } 111 | 112 | void printVerbose(Benchmark& bench, const std::chrono::duration& elapsed_seconds){ 113 | printf( "\t\t%-25s %25s Elapsed time: %f (s) " 114 | "Collision Count (Buckets): %d\n", 115 | bench.getContainerName().c_str(), 116 | bench.getHashName().c_str(), 117 | elapsed_seconds.count(), 118 | bench.calculateCollisionCountBuckets()); 119 | } 120 | 121 | static void reportMetricsCSV( 122 | const char* execMode, 123 | const char* argsString, 124 | const char* containerName, 125 | const char* hashFuncName, 126 | const float execTime, 127 | const int collisions) 128 | { 129 | printf( "%s,%s,%s,%s,%f,%d\n", 130 | execMode, 131 | argsString, 132 | containerName, 133 | hashFuncName, 134 | execTime, 135 | collisions); 136 | } 137 | 138 | void benchmarkExecutor(const std::vector& benchmarks, 139 | const std::vector& keys, 140 | const BenchmarkParameters& args) 141 | { 142 | 143 | // Init CSV File 144 | printf("%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n", 145 | "Execution Mode", 146 | "Key Distribution", 147 | "Num Operations", 148 | "Num Keys", 149 | "Insertions (%)", 150 | "Searches (%)", 151 | "Eliminatons(%)", 152 | "Hash Container", 153 | "Hash Function", 154 | "Execution Time (s)", 155 | "Collision Count"); 156 | 157 | char* argsString = (char*)malloc(sizeof(char)*100); 158 | sprintf(argsString, "%s,%d,%ld,%d,%d,%d", 159 | args.distribution.c_str(), 160 | args.numOperations, 161 | keys.size(), 162 | args.insert, 163 | args.search, 164 | args.elimination); 165 | 166 | // Execution modes are hard coded since we do not expect to add new ones or modify existing ones 167 | for(int r=0; r < args.repetitions; ++r){ 168 | for (const auto& bench : benchmarks){ 169 | 170 | // Execute benchmark 171 | auto start = std::chrono::system_clock::now(); 172 | executeInterweaved(bench, keys, args); 173 | auto end = std::chrono::system_clock::now(); 174 | std::chrono::duration elapsed_seconds = end-start; 175 | 176 | reportMetricsCSV("Interweaved", 177 | argsString, 178 | bench->getContainerName().c_str(), 179 | bench->getHashName().c_str(), 180 | elapsed_seconds.count(), 181 | bench->calculateCollisionCountBuckets()); 182 | 183 | bench->clearContainer(); 184 | 185 | } 186 | } 187 | 188 | for(int r=0; r < args.repetitions; ++r){ 189 | for (const auto& bench : benchmarks){ 190 | 191 | // Execute benchmark 192 | auto start = std::chrono::system_clock::now(); 193 | executeBatched(bench, keys, args); 194 | auto end = std::chrono::system_clock::now(); 195 | std::chrono::duration elapsed_seconds = end-start; 196 | 197 | reportMetricsCSV("Batched", 198 | argsString, 199 | bench->getContainerName().c_str(), 200 | bench->getHashName().c_str(), 201 | elapsed_seconds.count(), 202 | bench->calculateCollisionCountBuckets()); 203 | 204 | bench->clearContainer(); 205 | 206 | } 207 | } 208 | 209 | free(argsString); 210 | 211 | } 212 | 213 | void testDistribution(const std::vector& benchmarks, 214 | const std::vector& keys){ 215 | 216 | std::unordered_set hashFuncExecuted; 217 | std::unordered_map hashFuncCollisions; 218 | printf("import numpy as np\n"); 219 | printf("distributions = {}\n"); 220 | for (const auto& bench : benchmarks){ 221 | if(hashFuncExecuted.find(bench->getHashName()) != hashFuncExecuted.end()){ 222 | continue; 223 | } 224 | 225 | hashFuncExecuted.insert(bench->getHashName()); 226 | 227 | std::vector buckets; 228 | auto hashFunc = bench->getHashFunction(); 229 | 230 | for(const auto& key : keys){ 231 | size_t hashID = hashFunc(key); 232 | buckets.push_back(hashID); 233 | } 234 | 235 | std::sort(buckets.begin(), buckets.end()); 236 | printf("distributions['array_%s'] = np.array([", bench->getHashName().c_str()); 237 | for (size_t i = 0; i < buckets.size(); ++i) { 238 | printf("%lu,",buckets[i]); 239 | } 240 | printf("])\n"); 241 | // Count number of replicated values in the buckets 242 | auto newEnd = std::unique(buckets.begin(), buckets.end()); 243 | size_t numRepeated = buckets.end() - newEnd; 244 | hashFuncCollisions[bench->getHashName()] = numRepeated; 245 | } 246 | 247 | for ( const auto & [hashName, collisions] : hashFuncCollisions ) { 248 | printf("# Hash Function: %s, Collisions: %lu\n", hashName.c_str(), collisions); 249 | } 250 | } 251 | 252 | void testHashPerformance(const std::vector& benchmarks, 253 | const std::vector& keys, 254 | const BenchmarkParameters& args){ 255 | 256 | std::unordered_set hashFuncExecuted; 257 | printf("Hash Function,Elapsed Time (seconds)\n"); 258 | for (const auto& bench : benchmarks){ 259 | if(hashFuncExecuted.find(bench->getHashName()) != hashFuncExecuted.end()){ 260 | continue; 261 | } 262 | 263 | for(int r=0; r < args.repetitions; ++r){ 264 | hashFuncExecuted.insert(bench->getHashName()); 265 | auto hashFunc = bench->getHashFunction(); 266 | 267 | auto start = std::chrono::system_clock::now(); 268 | for(const auto& key : keys){ 269 | size_t hashID = hashFunc(key); 270 | } 271 | auto end = std::chrono::system_clock::now(); 272 | std::chrono::duration elapsed_seconds = end-start; 273 | printf("%s,%f\n", bench->getHashName().c_str(), elapsed_seconds.count()); 274 | } 275 | } 276 | } 277 | 278 | void freeBenchmarks(std::vector& benchmarks){ 279 | for(auto bench : benchmarks){ 280 | delete bench; 281 | } 282 | } 283 | -------------------------------------------------------------------------------- /src/sepe-runner/src/main.rs: -------------------------------------------------------------------------------- 1 | use std::{fs, io::Write, os::unix::ffi::OsStringExt, str::FromStr}; 2 | 3 | use clap::{Parser, ValueEnum}; 4 | use toml::Table; 5 | 6 | const TOML_FILENAME: &str = "Regexes.toml"; 7 | const KEYGEN: &str = "keygen"; 8 | const KEYUSER: &str = "keyuser"; 9 | const KEYUSER_DEBUG: &str = "keyuser-debug"; 10 | const KEYBUILDER: &str = "keybuilder"; 11 | 12 | #[derive(Debug, Clone, Copy, ValueEnum)] 13 | /// Distribution to use when randomly generating the characters 14 | enum Distribution { 15 | /// Uniform distribution 16 | Uniform, 17 | /// Normal distribution 18 | Normal, 19 | /// Incremental distribution (VERY SLOW) 20 | /// 21 | /// For example, a regex like [0-9]{3} will produce '001', '002', '003', and so on, in order. 22 | Incremental, 23 | } 24 | 25 | impl Distribution { 26 | fn as_str(&self) -> &'static str { 27 | match self { 28 | Distribution::Uniform => "uniform", 29 | Distribution::Normal => "normal", 30 | Distribution::Incremental => "incremental", 31 | } 32 | } 33 | } 34 | 35 | /// Helper program that stitches together the other programs in this repository 36 | /// 37 | /// It is meant to facilitate testing and benchmarking during development and research. 38 | /// 39 | /// **End users who only want to synthesize some functions do not have to interact with this**. 40 | /// They should just use keybuilder and keysynth directly, instead 41 | #[derive(Parser, Debug)] 42 | #[command(author, version, name = "sepe-runner")] 43 | struct Command { 44 | /// Number of keys to generate 45 | /// 46 | /// This is set to 10000 when `--histogram` is set or if distribution is 'incremental' 47 | #[clap( 48 | short, 49 | long, 50 | default_value = "1000000", 51 | default_value_if("histogram", "true", "10000"), 52 | default_value_if("distribution", "incremental", "10000") 53 | )] 54 | keys: u64, 55 | 56 | /// Distribution used in random generation 57 | #[clap(short, long, default_value = "uniform")] 58 | distribution: Distribution, 59 | 60 | /// Key generator random number generator seed 61 | #[clap(long, default_value = "223554")] 62 | keygen_seed: u64, 63 | 64 | /// Whether to run keyuser in debug mode 65 | #[clap(long, default_value = "false")] 66 | debug: bool, 67 | 68 | /// Key user random number generator seed 69 | #[clap(long, default_value = "9764096")] 70 | keyuser_seed: u64, 71 | 72 | /// Number of repetitions to forward to keyuser 73 | #[clap(short, long, default_value = "1")] 74 | repetitions: u64, 75 | 76 | /// Number of operations to run 77 | #[clap(short, long, default_value = "10000")] 78 | operations: u64, 79 | 80 | /// Percentage of insertion operations. **Must be in [0; 1] range** 81 | #[clap(short, long, default_value = "0.5")] 82 | insert: f64, 83 | 84 | /// Percentage of search operations. **Must be in [0; 1] range** 85 | #[clap(short, long, default_value = "0.3")] 86 | search: f64, 87 | 88 | /// Percentage of elimination operations. **Must be in [0; 1] range** 89 | #[clap(short, long, default_value = "0.2")] 90 | elimination: f64, 91 | 92 | /// Prints configuration and runs keyuser in verbose mode 93 | #[clap(short, long, default_value = "false")] 94 | verbose: bool, 95 | 96 | /// Suffix of output csv files. 97 | /// 98 | /// The prefix is the Regex's entry name in the Regexes.toml file. 99 | /// When `--histogram` is set, the default value is '_distribution.py' 100 | #[clap( 101 | long, 102 | default_value = "_performance.csv", 103 | default_value_if("histogram", "true", "_distribution.py") 104 | )] 105 | outfile: String, 106 | 107 | /// Generate the synthesized function for the given regex, do not run experiments 108 | #[clap(long)] 109 | synthesize: bool, 110 | 111 | /// Generate the distribution histogram for the given regex, do not run experiments 112 | #[clap(long)] 113 | histogram: bool, 114 | 115 | /// Measure the performance of the hash functions without containers 116 | #[clap(long)] 117 | hash_performance: bool, 118 | 119 | /// Whether to generate the keys incrementally, rather than randomly (VERY SLOW) 120 | #[clap(long)] 121 | incremental_generation: bool, 122 | 123 | /// Regexes we will benchmark, defined in Regexes.toml 124 | /// 125 | /// You can send multiple Regexes, separated by spaces 126 | regexes: Vec, 127 | } 128 | 129 | fn find_file(filename: &str) -> fs::DirEntry { 130 | let mut cwd = std::env::current_exe().expect("couldn't find out current executable path"); 131 | cwd.pop(); 132 | loop { 133 | let entries = cwd 134 | .read_dir() 135 | .unwrap_or_else(|e| panic!("failed to read directory {:#?}, {e}", cwd)); 136 | 137 | match entries 138 | .flatten() 139 | .find(|entry| entry.file_name().eq(filename)) 140 | { 141 | Some(entry) => return entry, 142 | None => { 143 | cwd = cwd 144 | .parent() 145 | .unwrap_or_else(|| panic!("failed to find {} file", filename)) 146 | .into() 147 | } 148 | } 149 | } 150 | } 151 | 152 | fn read_toml_file() -> Table { 153 | let entry = find_file(TOML_FILENAME); 154 | let file_bytes = fs::read(entry.path()) 155 | .unwrap_or_else(|e| panic!("failed to read {:#?} file: {e}", entry.file_name())); 156 | let file_content = std::str::from_utf8(&file_bytes) 157 | .unwrap_or_else(|e| panic!("failed to decode {:#?} file's utf8: {e}", entry.file_name())); 158 | 159 | Table::from_str(file_content).expect("failed to parse toml file's content") 160 | } 161 | 162 | fn main() { 163 | let mut cmd = Command::parse(); 164 | 165 | let regex_table = read_toml_file(); 166 | 167 | if cmd.regexes.contains(&"ALL".to_string()) { 168 | cmd.regexes.clear(); 169 | cmd.regexes = regex_table.keys().map(|key| key.to_string()).collect(); 170 | } 171 | 172 | for cmd_regex in cmd.regexes { 173 | let values = match regex_table.get(&cmd_regex) { 174 | Some(r) => r, 175 | None => { 176 | eprintln!("there is {} entry in {}", cmd_regex, TOML_FILENAME); 177 | continue; 178 | } 179 | }; 180 | 181 | let regex = values 182 | .get("regex") 183 | .unwrap_or_else(|| panic!("{} entry has no 'regex' value!", cmd_regex)) 184 | .as_str() 185 | .unwrap_or_else(|| panic!("{} regex entry must be a string!", cmd_regex)); 186 | 187 | let hashes = values 188 | .get("hashes") 189 | .unwrap_or_else(|| panic!("{} entry has no 'hashes' list!", cmd_regex)) 190 | .as_array() 191 | .unwrap_or_else(|| panic!("{} hashes entry must be a list of strings!", cmd_regex)); 192 | 193 | let keygen = find_file(KEYGEN); 194 | let keyuser = find_file(if cmd.debug { KEYUSER_DEBUG } else { KEYUSER }); 195 | 196 | use std::process::Command as Cmd; 197 | 198 | let keygen_cmd = Cmd::new(keygen.path()) 199 | .stdout(std::process::Stdio::piped()) 200 | .arg(regex) 201 | .arg("-n") 202 | .arg(format!("{}", cmd.keys)) 203 | .arg("-s") 204 | .arg(format!("{}", cmd.keygen_seed)) 205 | .arg("-d") 206 | .arg(cmd.distribution.as_str()) 207 | .spawn() 208 | .expect("failed to spawn keygen command"); 209 | 210 | let keygen_out = keygen_cmd.stdout.expect("failed to open keygen stdout"); 211 | 212 | if cmd.synthesize { 213 | let mut keybuilder_output = Cmd::new(find_file(KEYBUILDER).path()) 214 | .stdin(std::process::Stdio::from(keygen_out)) 215 | .output() 216 | .expect("failed to spawn keybuilder!"); 217 | 218 | keybuilder_output.stdout.pop(); 219 | 220 | let args = std::ffi::OsString::from_vec(keybuilder_output.stdout); 221 | let args = args.to_string_lossy(); 222 | let args: Box<[String]> = args.split_whitespace().map(|e| e.to_string()).collect(); 223 | match Cmd::new(find_file("keysynth").path()) 224 | .args(args.iter()) 225 | .spawn() 226 | .expect("failed to spawn keysynth!") 227 | .wait() 228 | { 229 | Ok(exit_status) => { 230 | if !exit_status.success() { 231 | eprintln!("ERROR: keysynth failed!"); 232 | } 233 | } 234 | Err(e) => { 235 | eprintln!("ERROR: keysynth couldn't run to completion: {e}!"); 236 | } 237 | } 238 | 239 | continue; 240 | } 241 | 242 | let mut keyuser_cmd = Cmd::new(keyuser.path()); 243 | 244 | keyuser_cmd 245 | .stdin(std::process::Stdio::from(keygen_out)) 246 | .arg("-i") 247 | .arg(format!("{}", (cmd.insert * 100.0) as u64)) 248 | .arg("-s") 249 | .arg(format!("{}", (cmd.search * 100.0) as u64)) 250 | .arg("-e") 251 | .arg(format!("{}", (cmd.elimination * 100.0) as u64)) 252 | .arg("-n") 253 | .arg(format!("{}", cmd.operations)) 254 | .arg("-seed") 255 | .arg(format!("{}", cmd.keyuser_seed)) 256 | .arg("-r") 257 | .arg(format!("{}", cmd.repetitions)) 258 | .arg("--distribution") 259 | .arg(cmd.distribution.as_str()); 260 | 261 | if cmd.verbose { 262 | keyuser_cmd.arg("--verbose"); 263 | } 264 | 265 | if cmd.histogram { 266 | keyuser_cmd.arg("--test-distribution"); 267 | } 268 | 269 | if cmd.hash_performance { 270 | keyuser_cmd.arg("--hash-performance"); 271 | } 272 | 273 | keyuser_cmd.arg("--hashes"); 274 | for hash in hashes { 275 | keyuser_cmd.arg(hash.as_str().unwrap_or_else(|| { 276 | panic!("{} hashes entry must be a list of strings!", cmd_regex) 277 | })); 278 | } 279 | 280 | println!("\nExecuting {} regex: {}", cmd_regex, regex); 281 | if cmd.verbose { 282 | println!(" Configuration:"); 283 | println!(" Debug: {}", cmd.debug); 284 | println!(" Keys Generated: {}", cmd.keys); 285 | println!(" Number of Operations: {}", cmd.operations); 286 | println!( 287 | " [Insertion, Search, Elimination ] Percentages: [{}, {}, {}]", 288 | cmd.insert, cmd.search, cmd.elimination 289 | ); 290 | } 291 | 292 | let keyuser_out = keyuser_cmd 293 | .output() 294 | .expect("failed to spawn keyuser command"); 295 | 296 | if !keyuser_out.status.success() { 297 | eprintln!(" !!!FAILED: {}!!!", keyuser_out.status); 298 | } else { 299 | let filename = cmd_regex + &cmd.outfile; 300 | let mut outfile = 301 | std::fs::File::create(filename).expect("failed to create output file!"); 302 | outfile.write_all(&keyuser_out.stdout).unwrap(); 303 | } 304 | std::io::stderr().write_all(&keyuser_out.stderr).unwrap(); 305 | } 306 | } 307 | -------------------------------------------------------------------------------- /src/keyuser/src/google-hashes/farm.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Google, Inc. 2 | // 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy 4 | // of this software and associated documentation files (the "Software"), to deal 5 | // in the Software without restriction, including without limitation the rights 6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | // copies of the Software, and to permit persons to whom the Software is 8 | // furnished to do so, subject to the following conditions: 9 | // 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | // 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | // THE SOFTWARE. 20 | // 21 | // FarmHash, by Geoff Pike 22 | 23 | // 24 | // http://code.google.com/p/farmhash/ 25 | // 26 | // This file provides a few functions for hashing strings and other 27 | // data. All of them are high-quality functions in the sense that 28 | // they do well on standard tests such as Austin Appleby's SMHasher. 29 | // They're also fast. FarmHash is the successor to CityHash. 30 | // 31 | // Functions in the FarmHash family are not suitable for cryptography. 32 | // 33 | // WARNING: This code has been only lightly tested on big-endian platforms! 34 | // It is known to work well on little-endian platforms that have a small penalty 35 | // for unaligned reads, such as current Intel and AMD moderate-to-high-end CPUs. 36 | // It should work on all 32-bit and 64-bit platforms that allow unaligned reads; 37 | // bug reports are welcome. 38 | // 39 | // By the way, for some hash functions, given strings a and b, the hash 40 | // of a+b is easily derived from the hashes of a and b. This property 41 | // doesn't hold for any hash functions in this file. 42 | 43 | #ifndef FARM_HASH_H_ 44 | #define FARM_HASH_H_ 45 | 46 | #include 47 | #include 48 | #include 49 | #include // for memcpy and memset 50 | #include 51 | 52 | #ifndef NAMESPACE_FOR_HASH_FUNCTIONS 53 | #define NAMESPACE_FOR_HASH_FUNCTIONS util 54 | #endif 55 | 56 | namespace NAMESPACE_FOR_HASH_FUNCTIONS { 57 | 58 | #if defined(FARMHASH_UINT128_T_DEFINED) 59 | #if defined(__clang__) 60 | #if !defined(uint128_t) 61 | #define uint128_t __uint128_t 62 | #endif 63 | #endif 64 | inline uint64_t Uint128Low64(const uint128_t x) { 65 | return static_cast(x); 66 | } 67 | inline uint64_t Uint128High64(const uint128_t x) { 68 | return static_cast(x >> 64); 69 | } 70 | inline uint128_t Uint128(uint64_t lo, uint64_t hi) { 71 | return lo + (((uint128_t)hi) << 64); 72 | } 73 | #else 74 | typedef std::pair uint128_t; 75 | inline uint64_t Uint128Low64(const uint128_t x) { return x.first; } 76 | inline uint64_t Uint128High64(const uint128_t x) { return x.second; } 77 | inline uint128_t Uint128(uint64_t lo, uint64_t hi) { return uint128_t(lo, hi); } 78 | #endif 79 | 80 | 81 | // BASIC STRING HASHING 82 | 83 | // Hash function for a byte array. 84 | // May change from time to time, may differ on different platforms, may differ 85 | // depending on NDEBUG. 86 | size_t Hash(const char* s, size_t len); 87 | 88 | // Hash function for a byte array. Most useful in 32-bit binaries. 89 | // May change from time to time, may differ on different platforms, may differ 90 | // depending on NDEBUG. 91 | uint32_t Hash32(const char* s, size_t len); 92 | 93 | // Hash function for a byte array. For convenience, a 32-bit seed is also 94 | // hashed into the result. 95 | // May change from time to time, may differ on different platforms, may differ 96 | // depending on NDEBUG. 97 | uint32_t Hash32WithSeed(const char* s, size_t len, uint32_t seed); 98 | 99 | // Hash function for a byte array. 100 | // May change from time to time, may differ on different platforms, may differ 101 | // depending on NDEBUG. 102 | uint64_t Hash64(const char* s, size_t len); 103 | 104 | // Hash function for a byte array. For convenience, a 64-bit seed is also 105 | // hashed into the result. 106 | // May change from time to time, may differ on different platforms, may differ 107 | // depending on NDEBUG. 108 | uint64_t Hash64WithSeed(const char* s, size_t len, uint64_t seed); 109 | 110 | // Hash function for a byte array. For convenience, two seeds are also 111 | // hashed into the result. 112 | // May change from time to time, may differ on different platforms, may differ 113 | // depending on NDEBUG. 114 | uint64_t Hash64WithSeeds(const char* s, size_t len, 115 | uint64_t seed0, uint64_t seed1); 116 | 117 | // Hash function for a byte array. 118 | // May change from time to time, may differ on different platforms, may differ 119 | // depending on NDEBUG. 120 | uint128_t Hash128(const char* s, size_t len); 121 | 122 | // Hash function for a byte array. For convenience, a 128-bit seed is also 123 | // hashed into the result. 124 | // May change from time to time, may differ on different platforms, may differ 125 | // depending on NDEBUG. 126 | uint128_t Hash128WithSeed(const char* s, size_t len, uint128_t seed); 127 | 128 | // BASIC NON-STRING HASHING 129 | 130 | // Hash 128 input bits down to 64 bits of output. 131 | // This is intended to be a reasonably good hash function. 132 | // May change from time to time, may differ on different platforms, may differ 133 | // depending on NDEBUG. 134 | inline uint64_t Hash128to64(uint128_t x) { 135 | // Murmur-inspired hashing. 136 | const uint64_t kMul = 0x9ddfea08eb382d69ULL; 137 | uint64_t a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul; 138 | a ^= (a >> 47); 139 | uint64_t b = (Uint128High64(x) ^ a) * kMul; 140 | b ^= (b >> 47); 141 | b *= kMul; 142 | return b; 143 | } 144 | 145 | // FINGERPRINTING (i.e., good, portable, forever-fixed hash functions) 146 | 147 | // Fingerprint function for a byte array. Most useful in 32-bit binaries. 148 | uint32_t Fingerprint32(const char* s, size_t len); 149 | 150 | // Fingerprint function for a byte array. 151 | uint64_t Fingerprint64(const char* s, size_t len); 152 | 153 | // Fingerprint function for a byte array. 154 | uint128_t Fingerprint128(const char* s, size_t len); 155 | 156 | // This is intended to be a good fingerprinting primitive. 157 | // See below for more overloads. 158 | inline uint64_t Fingerprint(uint128_t x) { 159 | // Murmur-inspired hashing. 160 | const uint64_t kMul = 0x9ddfea08eb382d69ULL; 161 | uint64_t a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul; 162 | a ^= (a >> 47); 163 | uint64_t b = (Uint128High64(x) ^ a) * kMul; 164 | b ^= (b >> 44); 165 | b *= kMul; 166 | b ^= (b >> 41); 167 | b *= kMul; 168 | return b; 169 | } 170 | 171 | // This is intended to be a good fingerprinting primitive. 172 | inline uint64_t Fingerprint(uint64_t x) { 173 | // Murmur-inspired hashing. 174 | const uint64_t kMul = 0x9ddfea08eb382d69ULL; 175 | uint64_t b = x * kMul; 176 | b ^= (b >> 44); 177 | b *= kMul; 178 | b ^= (b >> 41); 179 | b *= kMul; 180 | return b; 181 | } 182 | 183 | #ifndef FARMHASH_NO_CXX_STRING 184 | 185 | // Convenience functions to hash or fingerprint C++ strings. 186 | // These require that Str::data() return a pointer to the first char 187 | // (as a const char*) and that Str::length() return the string's length; 188 | // they work with std::string, for example. 189 | 190 | // Hash function for a byte array. 191 | // May change from time to time, may differ on different platforms, may differ 192 | // depending on NDEBUG. 193 | template 194 | inline size_t Hash(const Str& s) { 195 | assert(sizeof(s[0]) == 1); 196 | return Hash(s.data(), s.length()); 197 | } 198 | 199 | // Hash function for a byte array. Most useful in 32-bit binaries. 200 | // May change from time to time, may differ on different platforms, may differ 201 | // depending on NDEBUG. 202 | template 203 | inline uint32_t Hash32(const Str& s) { 204 | assert(sizeof(s[0]) == 1); 205 | return Hash32(s.data(), s.length()); 206 | } 207 | 208 | // Hash function for a byte array. For convenience, a 32-bit seed is also 209 | // hashed into the result. 210 | // May change from time to time, may differ on different platforms, may differ 211 | // depending on NDEBUG. 212 | template 213 | inline uint32_t Hash32WithSeed(const Str& s, uint32_t seed) { 214 | assert(sizeof(s[0]) == 1); 215 | return Hash32WithSeed(s.data(), s.length(), seed); 216 | } 217 | 218 | // Hash 128 input bits down to 64 bits of output. 219 | // Hash function for a byte array. 220 | // May change from time to time, may differ on different platforms, may differ 221 | // depending on NDEBUG. 222 | template 223 | inline uint64_t Hash64(const Str& s) { 224 | assert(sizeof(s[0]) == 1); 225 | return Hash64(s.data(), s.length()); 226 | } 227 | 228 | // Hash function for a byte array. For convenience, a 64-bit seed is also 229 | // hashed into the result. 230 | // May change from time to time, may differ on different platforms, may differ 231 | // depending on NDEBUG. 232 | template 233 | inline uint64_t Hash64WithSeed(const Str& s, uint64_t seed) { 234 | assert(sizeof(s[0]) == 1); 235 | return Hash64WithSeed(s.data(), s.length(), seed); 236 | } 237 | 238 | // Hash function for a byte array. For convenience, two seeds are also 239 | // hashed into the result. 240 | // May change from time to time, may differ on different platforms, may differ 241 | // depending on NDEBUG. 242 | template 243 | inline uint64_t Hash64WithSeeds(const Str& s, uint64_t seed0, uint64_t seed1) { 244 | assert(sizeof(s[0]) == 1); 245 | return Hash64WithSeeds(s.data(), s.length(), seed0, seed1); 246 | } 247 | 248 | // Hash function for a byte array. 249 | // May change from time to time, may differ on different platforms, may differ 250 | // depending on NDEBUG. 251 | template 252 | inline uint128_t Hash128(const Str& s) { 253 | assert(sizeof(s[0]) == 1); 254 | return Hash128(s.data(), s.length()); 255 | } 256 | 257 | // Hash function for a byte array. For convenience, a 128-bit seed is also 258 | // hashed into the result. 259 | // May change from time to time, may differ on different platforms, may differ 260 | // depending on NDEBUG. 261 | template 262 | inline uint128_t Hash128WithSeed(const Str& s, uint128_t seed) { 263 | assert(sizeof(s[0]) == 1); 264 | return Hash128(s.data(), s.length(), seed); 265 | } 266 | 267 | // FINGERPRINTING (i.e., good, portable, forever-fixed hash functions) 268 | 269 | // Fingerprint function for a byte array. Most useful in 32-bit binaries. 270 | template 271 | inline uint32_t Fingerprint32(const Str& s) { 272 | assert(sizeof(s[0]) == 1); 273 | return Fingerprint32(s.data(), s.length()); 274 | } 275 | 276 | // Fingerprint 128 input bits down to 64 bits of output. 277 | // Fingerprint function for a byte array. 278 | template 279 | inline uint64_t Fingerprint64(const Str& s) { 280 | assert(sizeof(s[0]) == 1); 281 | return Fingerprint64(s.data(), s.length()); 282 | } 283 | 284 | // Fingerprint function for a byte array. 285 | template 286 | inline uint128_t Fingerprint128(const Str& s) { 287 | assert(sizeof(s[0]) == 1); 288 | return Fingerprint128(s.data(), s.length()); 289 | } 290 | 291 | #endif 292 | 293 | } // namespace NAMESPACE_FOR_HASH_FUNCTIONS 294 | 295 | /* gently define FARMHASH_BIG_ENDIAN when detected big-endian machine */ 296 | #if defined(__BIG_ENDIAN__) 297 | #if !defined(FARMHASH_BIG_ENDIAN) 298 | #define FARMHASH_BIG_ENDIAN 299 | #endif 300 | #elif defined(__LITTLE_ENDIAN__) 301 | // nothing for little-endian 302 | #elif defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && (__BYTE_ORDER == __ORDER_LITTLE_ENDIAN__) 303 | // nothing for little-endian 304 | #elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && (__BYTE_ORDER == __ORDER_BIG_ENDIAN__) 305 | #if !defined(FARMHASH_BIG_ENDIAN) 306 | #define FARMHASH_BIG_ENDIAN 307 | #endif 308 | #elif defined(__linux__) || defined(__CYGWIN__) || defined( __GNUC__ ) && !defined(_WIN32) || defined( __GNU_LIBRARY__ ) 309 | #include // libc6-dev, GLIBC 310 | #if BYTE_ORDER == BIG_ENDIAN 311 | #if !defined(FARMHASH_BIG_ENDIAN) 312 | #define FARMHASH_BIG_ENDIAN 313 | #endif 314 | #endif 315 | #elif defined(__OpenBSD__) || defined(__NetBSD__) || defined(__FreeBSD__) || defined(__DragonFly__) || defined(__s390x__) 316 | #include 317 | #if BYTE_ORDER == BIG_ENDIAN 318 | #if !defined(FARMHASH_BIG_ENDIAN) 319 | #define FARMHASH_BIG_ENDIAN 320 | #endif 321 | #endif 322 | #elif defined(_WIN32) 323 | // Windows is (currently) little-endian 324 | #else 325 | #error "Unable to determine endianness!" 326 | #endif /* __BIG_ENDIAN__ */ 327 | 328 | #endif // FARM_HASH_H_ 329 | -------------------------------------------------------------------------------- /scripts/global_keyuser_interpreter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # This script reads multiple .csv files produced by keyuser, extracts relevant 4 | # information and calculate metrics to compare the different hash functions 5 | 6 | import os 7 | import argparse 8 | import importlib 9 | import re 10 | 11 | # Data science boys 12 | import numpy as np 13 | import pandas as pd 14 | import matplotlib.pyplot as plt 15 | from scipy import stats 16 | 17 | def distribution_test(key, value): 18 | 19 | n = len(value) 20 | chi_worst_case = ((n-1)**2) + (n-1) 21 | 22 | # Calculate the observed frequencies of each value 23 | observed_freq, _ = np.histogram(value, bins=len(np.unique(value))) 24 | 25 | # Calculate the expected frequencies for a uniform distribution 26 | expected_freq = np.full_like(observed_freq, len(value) / len(np.unique(value))) 27 | 28 | # Perform the Chi-Square Goodness of Fit Test 29 | try: 30 | chi2, p = stats.chisquare(observed_freq, expected_freq) 31 | except Exception: 32 | p = 0.0 33 | chi2 = chi_worst_case 34 | 35 | skewness = stats.skew(value) 36 | 37 | return pd.DataFrame(pd.DataFrame({"Hash Function": [key], "Skewness": [skewness], "Chi-Test": [chi2], "Uniform?": [p > 0.05]})) 38 | 39 | def handle_distribution_analysis(args): 40 | try: 41 | arrays = {} 42 | for file in args.distribution: 43 | abs_path = os.path.abspath(file) 44 | spec = importlib.util.spec_from_file_location(os.path.basename(file.replace(".py","")),abs_path) 45 | module = importlib.util.module_from_spec(spec) 46 | spec.loader.exec_module(module) 47 | arrays[file] = module 48 | except ImportError: 49 | print(args.distribution, "is an invalid import! Please make sure that the module exists and is in the same directory as this script.") 50 | print("If it does not exist, you can obtain it by running keyuser with --test-distribution or bench-runner with --histogram ", args.distribution) 51 | exit(1) 52 | 53 | result_array = None 54 | for module_name, current_module in arrays.items(): 55 | regex_name = os.path.basename(module_name).replace(".py", "") 56 | 57 | if args.hash_functions is None: 58 | args.hash_functions = [] 59 | for key, _ in current_module.distributions.items(): 60 | args.hash_functions.append(key) 61 | 62 | result = None 63 | if args.plot_graph: 64 | file_destination = args.output_destination + regex_name + "_histogram.svg" 65 | _, ax = plt.subplots(figsize=(10, 5)) 66 | 67 | for key, value in current_module.distributions.items(): 68 | key = key.replace("array_", "") 69 | 70 | if result is None: 71 | result = distribution_test(key, value) 72 | else: 73 | result = pd.concat([result, distribution_test(key, value)], ignore_index=True) 74 | 75 | print(result) 76 | 77 | # Normalize "Chi-Test" column using the "STDHashSrc" as the reference 78 | result["Chi-Test"] = result["Chi-Test"] / result[result["Hash Function"] == "STDHashSrc"]["Chi-Test"].values[0] 79 | 80 | # Remove all entries from the dataframe that contain Simd Keyword 81 | result = result[~result['Hash Function'].str.contains("Simd")] 82 | result = result[~result['Hash Function'].str.contains("Murmur")] 83 | 84 | # Replace all instances of AbseilHash with Abseil 85 | result['Hash Function'] = [re.sub(r'AbseilHash.*', 'Abseil', x) for x in result['Hash Function']] 86 | result['Hash Function'] = [re.sub(r'FNVHash.*', 'FNV', x) for x in result['Hash Function']] 87 | result['Hash Function'] = [re.sub(r'CityHash.*', 'City', x) for x in result['Hash Function']] 88 | 89 | # Concatenate all entries on the dataframe HashFunction that start with the same Prefix 90 | result['Hash Function'] = [re.sub(r'Pext.*', 'Pext', x) for x in result['Hash Function']] 91 | result['Hash Function'] = [re.sub(r'OffXor.*', 'OffXor', x) for x in result['Hash Function']] 92 | result['Hash Function'] = [re.sub(r'Naive.*', 'Naive', x) for x in result['Hash Function']] 93 | result['Hash Function'] = [re.sub(r'Gperf.*', 'Gperf', x) for x in result['Hash Function']] 94 | result['Hash Function'] = [re.sub(r'Gpt.*', 'Gpt', x) for x in result['Hash Function']] 95 | result['Hash Function'] = [re.sub(r'STDHashSrc.*', 'STL', x) for x in result['Hash Function']] 96 | result['Hash Function'] = [re.sub(r'Aes.*', 'Aes', x) for x in result['Hash Function']] 97 | 98 | if result_array is None: 99 | result_array = result 100 | else: 101 | result_array = pd.concat([result_array, result], ignore_index=True) 102 | 103 | # Remove all entries from the dataframe that contain Simd Keyword 104 | result_array = result_array.groupby("Hash Function")["Chi-Test"].sum().reset_index() 105 | result_array["Chi-Test"] = result_array["Chi-Test"] / len(arrays) 106 | 107 | print("Below DataFrame from distribution file: ", args.distribution) 108 | print(result_array) 109 | output_path = args.output_destination + regex_name + "_chitest.csv" 110 | print("See all results in: ", output_path) 111 | result_array.to_csv(output_path, index=False) 112 | 113 | # if args.plot_graph: 114 | # ax.legend(loc='upper left', bbox_to_anchor=(1, 1)) 115 | # plt.tight_layout() 116 | # plt.savefig(file_destination) 117 | # print("Histograms saved in: ", file_destination) 118 | 119 | ######################## 120 | # Performance Analysis # 121 | ######################## 122 | 123 | def mannwhitneyu_from_dataframe(args, regex_name, df, column_to_group, column_to_compare): 124 | grouped = df.groupby(column_to_group) 125 | 126 | # Extract the groups from the DataFrame 127 | groups = {} 128 | groups = [group for group in grouped.groups] 129 | 130 | # Initialize an empty DataFrame 131 | result = pd.DataFrame() 132 | 133 | for hashFunc1 in groups: 134 | for hashFunc2 in groups: 135 | if hashFunc1 == hashFunc2: 136 | continue 137 | 138 | # Calculate the Mann-Whitney U test between the first two groups 139 | _, p = stats.mannwhitneyu(grouped.get_group(hashFunc1)[column_to_compare], grouped.get_group(hashFunc2)[column_to_compare]) 140 | 141 | # interpret 142 | alpha = 0.05 143 | 144 | # Append a new row to the DataFrame 145 | result = pd.concat([result, pd.DataFrame({"Hash Function 1": [hashFunc1], "Hash Function 2": [hashFunc2], "p-value": [p], "Same Distribution?": [p > alpha]})], ignore_index=True) 146 | 147 | 148 | 149 | print("Below DataFrame from Regex: ", regex_name) 150 | print(result) 151 | column_to_compare = column_to_compare.replace(" ", "_") 152 | output_path = args.output_destination + regex_name + column_to_compare + "_mannwhitneyu.csv" 153 | print("See all results in: ", output_path) 154 | result.to_csv(output_path, index=False) 155 | 156 | 157 | def containers_boxplot(args, df): 158 | 159 | # grouped = df.groupby(['Execution Mode', 'Num Operations', 'Num Keys', 'Insertions (%)', 'Searches (%)', 'Eliminatons(%)', 'Hash Container']) 160 | # # Extract the groups from the DataFrame 161 | # groups = {} 162 | # groups = [group for group in grouped.groups] 163 | 164 | # all_data = {} 165 | 166 | # for group in groups: 167 | 168 | # temp = grouped.get_group(group)[['Execution Time (s)', 'Collision Count']] 169 | 170 | # hash_func_name = group[-1] 171 | 172 | # if hash_func_name not in all_data: 173 | # all_data[hash_func_name] = [(temp['Execution Time (s)'].mean(), temp['Collision Count'].mean())] 174 | # else: 175 | # all_data[hash_func_name].append((temp['Execution Time (s)'].mean(), temp['Collision Count'].mean())) 176 | 177 | # # Geometric mean of all_data 178 | # result = pd.DataFrame() 179 | 180 | # for data in all_data: 181 | # samples_geotime = 1.0 182 | # samples_collision = 1.0 183 | # for sample in all_data[data]: 184 | # samples_geotime *= sample[0] 185 | # if sample[1] != 0: 186 | # samples_collision *= sample[1] 187 | # samples_geotime = samples_geotime ** (1/len(all_data[data])) 188 | # samples_collision = samples_collision ** (1/len(all_data[data])) 189 | # result = pd.concat([result, pd.DataFrame({"Func Name": [data], "GeoTime": [samples_geotime], "GeoCollision": [samples_collision]})], ignore_index=True) 190 | 191 | # print("Below DataFrame from Regex: ", regex_name) 192 | # print(result) 193 | # output_path = args.output_destination + regex_name + "_geomean.csv" 194 | # print("See all results in: ", output_path) 195 | # result.to_csv(output_path, index=False) 196 | 197 | plt.rcParams['font.size'] = 14 198 | df.boxplot(column='Execution Time (s)', by='Hash Container', rot=45, showmeans=True, showfliers=False) 199 | plt.ylabel('Execution Time (s)') 200 | plt.xlabel('') 201 | plt.xticks([1, 2, 3, 4], ["U_Map", "UM_Map", "UM_Set", "U_Set"]) 202 | plt.title('') 203 | plt.suptitle('') 204 | plt.tight_layout() 205 | plt.savefig(args.output_destination+'containers.pdf') 206 | 207 | print("Container Boxplots saved in: ", args.output_destination+'containers.pdf') 208 | 209 | 210 | def performance_from_dataframe(args, df, regex_name): 211 | grouped = df.groupby(['Execution Mode', 'Num Operations', 'Num Keys', 'Insertions (%)', 'Searches (%)', 'Eliminatons(%)', 'Hash Function']) 212 | 213 | # Extract the groups from the DataFrame 214 | groups = {} 215 | groups = [group for group in grouped.groups] 216 | 217 | all_data = {} 218 | 219 | for group in groups: 220 | temp = grouped.get_group(group)[['Execution Time (s)', 'Collision Count']] 221 | 222 | hash_func_name = group[-1] 223 | 224 | if hash_func_name not in all_data: 225 | all_data[hash_func_name] = [(temp['Execution Time (s)'].mean(), temp['Collision Count'].mean())] 226 | else: 227 | all_data[hash_func_name].append((temp['Execution Time (s)'].mean(), temp['Collision Count'].mean())) 228 | 229 | # Geometric mean of all_data 230 | result = pd.DataFrame() 231 | 232 | for data in all_data: 233 | samples_geotime = 1.0 234 | samples_collision = 1.0 235 | for sample in all_data[data]: 236 | samples_geotime *= sample[0] 237 | if sample[1] != 0: 238 | samples_collision *= sample[1] 239 | samples_geotime = samples_geotime ** (1/len(all_data[data])) 240 | samples_collision = samples_collision ** (1/len(all_data[data])) 241 | result = pd.concat([result, pd.DataFrame({"Func Name": [data], "GeoTime": [samples_geotime], "GeoCollision": [samples_collision]})], ignore_index=True) 242 | 243 | print("Below DataFrame from Regex: ", regex_name) 244 | print(result) 245 | output_path = args.output_destination + regex_name + "_geomean.csv" 246 | print("See all results in: ", output_path) 247 | result.to_csv(output_path, index=False) 248 | 249 | def handle_performance_analysis(args): 250 | 251 | # Load CSV files into pandas dataframe 252 | csv_files = args.performance 253 | dataframes = [pd.read_csv(file) for file in csv_files] 254 | 255 | regex_name = "global" 256 | 257 | # Concatenate dataframes 258 | df = pd.concat(dataframes, ignore_index=True) 259 | 260 | # Remove all entries from the dataframe that contain Simd Keyword 261 | df = df[~df['Hash Function'].str.contains("Simd")] 262 | 263 | # Replace all instances of AbseilHash with Abseil 264 | df['Hash Function'] = [re.sub(r'AbseilHash.*', 'Abseil', x) for x in df['Hash Function']] 265 | df['Hash Function'] = [re.sub(r'FNVHash.*', 'FNV', x) for x in df['Hash Function']] 266 | df['Hash Function'] = [re.sub(r'CityHash.*', 'City', x) for x in df['Hash Function']] 267 | 268 | # Concatenate all entries on the dataframe HashFunction that start with the same Prefix 269 | df['Hash Function'] = [re.sub(r'Pext.*', 'Pext', x) for x in df['Hash Function']] 270 | df['Hash Function'] = [re.sub(r'OffXor.*', 'OffXor', x) for x in df['Hash Function']] 271 | df['Hash Function'] = [re.sub(r'Naive.*', 'Naive', x) for x in df['Hash Function']] 272 | df['Hash Function'] = [re.sub(r'Gperf.*', 'Gperf', x) for x in df['Hash Function']] 273 | df['Hash Function'] = [re.sub(r'Gpt.*', 'Gpt', x) for x in df['Hash Function']] 274 | df['Hash Function'] = [re.sub(r'STDHashSrc.*', 'STL', x) for x in df['Hash Function']] 275 | df['Hash Function'] = [re.sub(r'Aes.*', 'Aes', x) for x in df['Hash Function']] 276 | 277 | if args.hash_performance: 278 | # Iterate Hash Functions in the dataframe 279 | grouped = df.groupby(['Hash Function']) 280 | for hashFunc in grouped.groups: 281 | mean = grouped.get_group(hashFunc)['Elapsed Time (seconds)'].mean() 282 | mean = mean * 1000 283 | print(f"{hashFunc},{mean:.4f}") 284 | return 285 | 286 | # Calculate the Mann-Whitney U test 287 | mannwhitneyu_from_dataframe(args, regex_name, df, 'Hash Function', 'Execution Time (s)') 288 | mannwhitneyu_from_dataframe(args, regex_name, df, 'Hash Function', 'Collision Count') 289 | 290 | if args.rq6: 291 | containers_boxplot(args, df) 292 | 293 | performance_from_dataframe(args, df, regex_name) 294 | 295 | plt.rcParams['font.size'] = 14 296 | df.boxplot(column='Collision Count', by='Hash Function', rot=45, showmeans=True, showfliers=False) 297 | plt.ylabel('Collision Count') 298 | plt.xlabel('') 299 | plt.title('') 300 | plt.suptitle('') 301 | plt.tight_layout() 302 | plt.savefig(args.output_destination+'global_collision_count.pdf') 303 | print("Collision Count Boxplots saved in: ", args.output_destination+'global_collision_count.pdf') 304 | 305 | df = df[df['Hash Function'] != 'Gperf'] 306 | df.boxplot(column='Execution Time (s)', by='Hash Function', rot=45, showmeans=True, showfliers=False) 307 | plt.ylabel('Execution Time (s)') 308 | plt.xlabel('') 309 | plt.title('') 310 | plt.suptitle('') 311 | plt.tight_layout() 312 | plt.savefig(args.output_destination+'global_performance.pdf') 313 | print("Execution Time Boxplots saved in: ", args.output_destination+'global_performance.pdf') 314 | 315 | def main(): 316 | parser = argparse.ArgumentParser(description="Keyuser Interpreter") 317 | parser.add_argument("-d", "--distribution", nargs='*', type=str, default="", help="Name of the distribution files to interpret. Exclusive with -p option.") 318 | parser.add_argument("-p", "--performance", nargs='*', type=str, default="", help="Name of the csv performance files to interpret. Exclusive with -d option.") 319 | parser.add_argument("-hp", "--hash-performance", action='store_true', help="Name of the csv performance files to interpret.") 320 | parser.add_argument("-rq6", action='store_true', help="Group performance by data structure.") 321 | parser.add_argument("-pg", "--plot-graph", action='store_true', help="Option to plot the results in graphs.") 322 | parser.add_argument("-od", "--output-destination", type=str, default="results/", help="Output path to output graphs. Default is current file.") 323 | parser.add_argument("-fp", "--full-print", action='store_true', help="Print the entire dataframe.") 324 | parser.add_argument("-hf", "--hash-functions", nargs='*', type=str, help="Name of the hash functions to analyze.") 325 | 326 | args = parser.parse_args() 327 | 328 | if args.full_print: 329 | pd.set_option('display.max_rows', None) 330 | pd.set_option('display.max_columns', None) 331 | pd.set_option('display.width', None) 332 | pd.set_option('display.max_colwidth', None) 333 | 334 | if args.performance: 335 | handle_performance_analysis(args) 336 | elif args.distribution: 337 | handle_distribution_analysis(args) 338 | 339 | main() 340 | -------------------------------------------------------------------------------- /src/keyuser/src/benchmarks.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file benchmarks.hpp 3 | * @brief This file contains functions to execute and manage hash function benchmarks. 4 | */ 5 | 6 | #ifndef BENCHMARKS_HPP 7 | #define BENCHMARKS_HPP 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | /** 15 | * @brief Base class for benchmarking. 16 | */ 17 | class Benchmark{ 18 | 19 | const std::string containerName; ///< The name of the container used in the benchmark. 20 | const std::string hashName; ///< The name of the hash function used in the benchmark. 21 | 22 | protected: 23 | 24 | /** 25 | * @brief Calculates the number of collisions in the buckets of a given unordered container. 26 | * 27 | * @tparam UnorderedContainer The type of the unordered container. 28 | * @param container The unordered container to calculate collisions for. 29 | * @return The number of collisions. 30 | */ 31 | template 32 | int internalcalculateCollisionCountBuckets(const UnorderedContainer& container) { 33 | // STD Containers may have multiple keys inside the same bucket, even though they have different hashes :( 34 | int colcount = 0; 35 | int empty = 0; 36 | for (size_t bucket = 0; bucket < container.bucket_count(); ++bucket) { 37 | if (container.bucket_size(bucket) > 1) { 38 | colcount += container.bucket_size(bucket) - 1; 39 | } else { 40 | ++empty; 41 | } 42 | } 43 | return colcount; 44 | } 45 | 46 | public: 47 | /** 48 | * @brief Construct a new Benchmark object. 49 | * 50 | * @param _containerName The name of the container used in the benchmark. 51 | * @param _hashName The name of the hash function used in the benchmark. 52 | */ 53 | Benchmark(const std::string& _containerName, const std::string& _hashName) : 54 | containerName(_containerName), 55 | hashName(_hashName) 56 | {} 57 | 58 | /** 59 | * @brief Get the name of the container used in the benchmark. 60 | * 61 | * @return The name of the container. 62 | */ 63 | std::string getContainerName(){ return containerName; } 64 | 65 | /** 66 | * @brief Get the name of the hash function used in the benchmark. 67 | * 68 | * @return The name of the hash function. 69 | */ 70 | std::string getHashName(){ return hashName; } 71 | 72 | /** 73 | * @brief Virtual destructor. 74 | */ 75 | virtual ~Benchmark() {} 76 | 77 | /** 78 | * @brief Insert a key into the container. Must be implemented by derived classes. 79 | * 80 | * @param key The key to insert. 81 | */ 82 | virtual void insert(const std::string& key) = 0; 83 | 84 | /** 85 | * @brief Search for a key in the container. Must be implemented by derived classes. 86 | * 87 | * @param key The key to search for. 88 | * @return true If the key is found. 89 | * @return false Otherwise. 90 | */ 91 | virtual bool search(const std::string& key) = 0; 92 | 93 | /** 94 | * @brief Remove a key from the container. Must be implemented by derived classes. 95 | * 96 | * @param key The key to remove. 97 | */ 98 | virtual void elimination(const std::string& key) = 0; 99 | 100 | /** 101 | * @brief Calculate the number of collision buckets in the container. Must be implemented by derived classes. 102 | * 103 | * @return The number of collision buckets. 104 | */ 105 | virtual int calculateCollisionCountBuckets(void) = 0; 106 | 107 | /** 108 | * @brief Clear the container benchmark between executions. Must be implemented by derived classes. 109 | */ 110 | virtual void clearContainer(void) = 0; 111 | 112 | /** 113 | * @brief Get the hash function used by the container. Must be implemented by derived classes. 114 | * 115 | * @return The hash function. 116 | */ 117 | virtual std::function getHashFunction(void) = 0; 118 | }; 119 | 120 | /** 121 | * @brief A benchmarking class for unordered map with a custom hash function for std::string. 122 | * 123 | * @tparam HashFuncT The type of the hash function. 124 | */ 125 | template 126 | class UnorderedMapBench : public Benchmark{ 127 | std::unordered_map map; ///< The unordered map used for benchmarking. 128 | HashFuncT hashFunctor; ///< The hash function object. 129 | 130 | public: 131 | /** 132 | * @brief Construct a new Unordered Map Bench object. 133 | * 134 | * @param _name The name of the benchmark. 135 | * @param _hashName The name of the hash function. 136 | */ 137 | UnorderedMapBench(std::string _name, std::string _hashName) : 138 | Benchmark(_name, _hashName) 139 | {} 140 | 141 | /** 142 | * @brief Insert a key into the unordered map. 143 | * 144 | * @param key The key to insert. 145 | */ 146 | void insert(const std::string& key) override { 147 | map[key] = 0; 148 | } 149 | 150 | /** 151 | * @brief Search for a key in the unordered map. 152 | * 153 | * @param key The key to search for. 154 | * @return true If the key is found. 155 | * @return false Otherwise. 156 | */ 157 | bool search(const std::string& key) override { 158 | return map.find(key) != map.end(); 159 | } 160 | 161 | /** 162 | * @brief Remove a key from the unordered map. 163 | * 164 | * @param key The key to remove. 165 | */ 166 | void elimination(const std::string& key) override { 167 | map.erase(key); 168 | } 169 | 170 | /** 171 | * @brief Calculate the number of collision buckets in the unordered map. 172 | * 173 | * @return int The number of collision buckets. 174 | */ 175 | int calculateCollisionCountBuckets(void) override { 176 | return internalcalculateCollisionCountBuckets(map); 177 | } 178 | 179 | /** 180 | * @brief Clear the container benchmark between executions. 181 | */ 182 | void clearContainer(void) override{ 183 | map.clear(); 184 | } 185 | 186 | /** 187 | * @brief Get the hash function used by the unordered map. 188 | * 189 | * @return std::function The hash function. 190 | */ 191 | std::function getHashFunction(void) override { 192 | return map.hash_function(); 193 | } 194 | }; 195 | 196 | /** 197 | * @brief A benchmarking class for unordered multimap with a custom hash function for std::string. 198 | * 199 | * @tparam HashFuncT The type of the hash function. 200 | */ 201 | template 202 | class UnorderedMultiMapBench : public Benchmark{ 203 | std::unordered_multimap mmap; ///< The unordered multimap used for benchmarking. 204 | 205 | public: 206 | /** 207 | * @brief Construct a new Unordered Multi Map Bench object. 208 | * 209 | * @param _name The name of the benchmark. 210 | * @param _hashName The name of the hash function. 211 | */ 212 | UnorderedMultiMapBench(std::string _name, std::string _hashName) : 213 | Benchmark(_name, _hashName) 214 | {} 215 | 216 | /** 217 | * @brief Insert a key into the unordered multimap. 218 | * 219 | * @param key The key to insert. 220 | */ 221 | void insert(const std::string& key) override { 222 | mmap.insert(std::make_pair(key, 0)); 223 | } 224 | 225 | /** 226 | * @brief Search for a key in the unordered multimap. 227 | * 228 | * @param key The key to search for. 229 | * @return true If the key is found. 230 | * @return false Otherwise. 231 | */ 232 | bool search(const std::string& key) override { 233 | return mmap.find(key) != mmap.end(); 234 | } 235 | 236 | /** 237 | * @brief Remove a key from the unordered multimap. 238 | * 239 | * @param key The key to remove. 240 | */ 241 | void elimination(const std::string& key) override { 242 | mmap.erase(key); 243 | } 244 | 245 | /** 246 | * @brief Calculate the number of collision buckets in the unordered multimap. 247 | * 248 | * @return int The number of collision buckets. 249 | */ 250 | int calculateCollisionCountBuckets(void) override { 251 | return internalcalculateCollisionCountBuckets(mmap); 252 | } 253 | 254 | /** 255 | * @brief Clear the container benchmark between executions. 256 | */ 257 | void clearContainer(void) override{ 258 | mmap.clear(); 259 | } 260 | 261 | /** 262 | * @brief Get the hash function used by the unordered multimap. 263 | * 264 | * @return std::function The hash function. 265 | */ 266 | std::function getHashFunction(void) override { 267 | return mmap.hash_function(); 268 | } 269 | }; 270 | 271 | /** 272 | * @brief A benchmarking class for unordered set with a custom hash function for std::string. 273 | * 274 | * @tparam HashFuncT The type of the hash function. 275 | */ 276 | template 277 | class UnorderedSetBench : public Benchmark{ 278 | std::unordered_set set; ///< The unordered set used for benchmarking. 279 | 280 | public: 281 | /** 282 | * @brief Construct a new Unordered Set Bench object. 283 | * 284 | * @param _name The name of the benchmark. 285 | * @param _hashName The name of the hash function. 286 | */ 287 | UnorderedSetBench(std::string _name, std::string _hashName) : 288 | Benchmark(_name, _hashName) 289 | {} 290 | 291 | /** 292 | * @brief Insert a key into the unordered set. 293 | * 294 | * @param key The key to insert. 295 | */ 296 | void insert(const std::string& key) override { 297 | set.insert(key); 298 | } 299 | 300 | /** 301 | * @brief Search for a key in the unordered set. 302 | * 303 | * @param key The key to search for. 304 | * @return true If the key is found. 305 | * @return false Otherwise. 306 | */ 307 | bool search(const std::string& key) override { 308 | return set.find(key) != set.end(); 309 | } 310 | 311 | /** 312 | * @brief Remove a key from the unordered set. 313 | * 314 | * @param key The key to remove. 315 | */ 316 | void elimination(const std::string& key) override { 317 | set.erase(key); 318 | } 319 | 320 | /** 321 | * @brief Calculate the number of collision buckets in the unordered set. 322 | * 323 | * @return int The number of collision buckets. 324 | */ 325 | int calculateCollisionCountBuckets(void) override { 326 | return internalcalculateCollisionCountBuckets(set); 327 | } 328 | 329 | /** 330 | * @brief Clear the container benchmark between executions. 331 | */ 332 | void clearContainer(void) override{ 333 | set.clear(); 334 | } 335 | 336 | /** 337 | * @brief Get the hash function used by the unordered set. 338 | * 339 | * @return std::function The hash function. 340 | */ 341 | std::function getHashFunction(void) override { 342 | return set.hash_function(); 343 | } 344 | }; 345 | 346 | /** 347 | * @brief A benchmarking class for unordered multiset with a custom hash function for std::string. 348 | * 349 | * @tparam HashFuncT The type of the hash function. 350 | */ 351 | template 352 | class UnorderedMultisetBench : public Benchmark{ 353 | std::unordered_multiset mset; ///< The unordered multiset used for benchmarking. 354 | 355 | public: 356 | /** 357 | * @brief Construct a new Unordered Multiset Bench object. 358 | * 359 | * @param _name The name of the benchmark. 360 | * @param _hashName The name of the hash function. 361 | */ 362 | UnorderedMultisetBench(std::string _name, std::string _hashName) : 363 | Benchmark(_name, _hashName) 364 | {} 365 | 366 | /** 367 | * @brief Insert a key into the unordered multiset. 368 | * 369 | * @param key The key to insert. 370 | */ 371 | void insert(const std::string& key) override { 372 | mset.insert(key); 373 | } 374 | 375 | /** 376 | * @brief Search for a key in the unordered multiset. 377 | * 378 | * @param key The key to search for. 379 | * @return true If the key is found. 380 | * @return false Otherwise. 381 | */ 382 | bool search(const std::string& key) override { 383 | return mset.find(key) != mset.end(); 384 | } 385 | 386 | /** 387 | * @brief Remove a key from the unordered multiset. 388 | * 389 | * @param key The key to remove. 390 | */ 391 | void elimination(const std::string& key) override { 392 | mset.erase(key); 393 | } 394 | 395 | /** 396 | * @brief Calculate the number of collision buckets in the unordered multiset. 397 | * 398 | * @return int The number of collision buckets. 399 | */ 400 | int calculateCollisionCountBuckets(void) override { 401 | return internalcalculateCollisionCountBuckets(mset); 402 | } 403 | 404 | /** 405 | * @brief Clear the container benchmark between executions. 406 | */ 407 | void clearContainer(void) override{ 408 | mset.clear(); 409 | } 410 | 411 | /** 412 | * @brief Get the hash function used by the unordered multiset. 413 | * 414 | * @return std::function The hash function. 415 | */ 416 | std::function getHashFunction(void) override { 417 | return mset.hash_function(); 418 | } 419 | }; 420 | 421 | /** 422 | * @brief Struct to hold parameters for benchmarking. 423 | */ 424 | struct BenchmarkParameters{ 425 | std::vector hashesToRun; ///< Vector of hash functions to run. 426 | int insert = -1; ///< Number of insert operations. 427 | int search = -1; ///< Number of search operations. 428 | int elimination = -1; ///< Number of elimination operations. 429 | int numOperations = -1; ///< Total number of operations. 430 | int seed = 223554; ///< Seed for random number generation. Chosen by a fair dice roll. 431 | int repetitions = 1; ///< Number of repetitions for each benchmark. 432 | bool verbose = false; ///< Verbose output flag. 433 | bool testDistribution = false; ///< Flag to test distribution. 434 | bool hashPerformance = false; ///< Flag to test hash performance. 435 | std::string distribution = "normal"; ///< Distribution to use for testing. 436 | }; 437 | 438 | /** 439 | * @brief Execute a benchmark with interweaved operations with 50% insertions warm-up. 440 | * 441 | * @param bench The benchmark to execute. 442 | * @param keys The keys to use in the benchmark. 443 | * @param args The parameters for the benchmark. 444 | */ 445 | void executeInterweaved(Benchmark* bench, 446 | const std::vector& keys, 447 | const BenchmarkParameters& args); 448 | 449 | /** 450 | * @brief Execute a benchmark with batched operations. 451 | * 452 | * @param bench The benchmark to execute. 453 | * @param keys The keys to use in the benchmark. 454 | * @param args The parameters for the benchmark. 455 | */ 456 | void executeBatched(Benchmark* bench, 457 | const std::vector& keys, 458 | const BenchmarkParameters& args); 459 | 460 | /** 461 | * @brief Execute a set of benchmarks and prints to standard output the performance and collision results in csv format. 462 | * 463 | * @param benchmarks The benchmarks to execute. 464 | * @param keys The keys to use in the benchmarks. 465 | * @param args The parameters for the benchmarks. 466 | */ 467 | void benchmarkExecutor(const std::vector& benchmarks, 468 | const std::vector& keys, 469 | const BenchmarkParameters& args); 470 | 471 | /** 472 | * @brief Tests the distribution of benchmarks. 473 | * 474 | * This function takes a vector of benchmarks and a vector of keys. It tests the distribution of the benchmarks 475 | * according to the keys provided. 476 | * 477 | * Prints to standard output a python numpy array containing all sorted hashed values. 478 | * 479 | * @param benchmarks A vector of pointers to Benchmark objects to be tested. 480 | * @param keys A vector of keys according to which the benchmarks are to be distributed. 481 | */ 482 | void testDistribution(const std::vector& benchmarks, 483 | const std::vector& keys); 484 | 485 | /** 486 | * @brief Tests the performance of a hash function. 487 | * 488 | * This function takes a vector of keys and benchmark parameters as input, 489 | * and prints to standard output the elapsed time in seconds to hash all informed keys. 490 | * 491 | * @param keys A constant reference to a vector of strings that represent the keys to be hashed. 492 | * @param args A constant reference to the BenchmarkParameters object that contains the parameters for the benchmark. 493 | */ 494 | void testHashPerformance(const std::vector& benchmarks, 495 | const std::vector& keys, 496 | const BenchmarkParameters& args); 497 | 498 | /** 499 | * @brief Frees the memory allocated for the benchmarks. 500 | * 501 | * This function takes a vector of benchmarks and deallocates the memory used by these benchmarks. 502 | * 503 | * @param benchmarks A vector of pointers to Benchmark objects to be freed. 504 | */ 505 | void freeBenchmarks(std::vector& benchmarks); 506 | 507 | #endif 508 | --------------------------------------------------------------------------------