├── src
    ├── keygen
    │   ├── .gitignore
    │   ├── Cargo.toml
    │   ├── Cargo.lock
    │   └── src
    │   │   └── main.rs
    ├── sepe-runner
    │   ├── .gitignore
    │   ├── Cargo.toml
    │   ├── Cargo.lock
    │   └── src
    │   │   └── main.rs
    ├── keysynth
    │   ├── .gitignore
    │   └── Makefile
    ├── keybuilder
    │   ├── .gitignore
    │   ├── Makefile
    │   └── src
    │   │   └── main.c
    └── keyuser
    │   ├── .gitignore
    │   ├── src
    │       ├── google-hashes
    │       │   ├── citycrc.hpp
    │       │   ├── city.hpp
    │       │   └── farm.hpp
    │       ├── gperf-hashes
    │       │   ├── gperf-hashes.h
    │       │   ├── url_fixed_pattern.c
    │       │   ├── cpf.c
    │       │   ├── mac.c
    │       │   ├── ipv6.c
    │       │   ├── url_complex.c
    │       │   ├── ipv4.c
    │       │   ├── ssn.c
    │       │   └── ints.c
    │       ├── registry.hpp
    │       ├── main.cpp
    │       ├── customHashes.hpp
    │       ├── benchmarks.cpp
    │       └── benchmarks.hpp
    │   └── Makefile
├── scripts
    ├── .gitignore
    ├── align_csv.sh
    ├── make_hash_from_regex.sh
    ├── install_abseil.sh
    ├── distribution_benchmark.sh
    ├── benchmark.sh
    ├── keyuser_interpreter.py
    └── global_keyuser_interpreter.py
├── assets
    └── images
    │   └── SepeBanner.png
├── .gitignore
├── results
    └── gpt-prompts
    │   ├── ssn.txt
    │   ├── url_fixed_pattern.txt
    │   ├── cpf.txt
    │   ├── ipv4.txt
    │   ├── url_complex.txt
    │   ├── ints.txt
    │   ├── ipv6.txt
    │   └── mac.txt
├── Regexes.toml
├── Makefile
└── README.md


/src/keygen/.gitignore:
--------------------------------------------------------------------------------
1 | target/
2 | 


--------------------------------------------------------------------------------
/src/sepe-runner/.gitignore:
--------------------------------------------------------------------------------
1 | target/
2 | 


--------------------------------------------------------------------------------
/src/keysynth/.gitignore:
--------------------------------------------------------------------------------
1 | keysynth
2 | keysynth-debug
3 | 


--------------------------------------------------------------------------------
/src/keybuilder/.gitignore:
--------------------------------------------------------------------------------
1 | keybuilder
2 | keybuilder-debug
3 | 


--------------------------------------------------------------------------------
/src/keyuser/.gitignore:
--------------------------------------------------------------------------------
1 | keyuser
2 | keyuser-debug
3 | obj/
4 | .vscode
5 | 


--------------------------------------------------------------------------------
/scripts/.gitignore:
--------------------------------------------------------------------------------
1 | array*.py
2 | *.png
3 | *.svg
4 | __pycache__/
5 | *histogram*
6 | *.csv
7 | 


--------------------------------------------------------------------------------
/assets/images/SepeBanner.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lac-dcc/sepe/HEAD/assets/images/SepeBanner.png


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .vscode
 2 | *.csv
 3 | bin/
 4 | output/
 5 | results/
 6 | deps/
 7 | __pycache__/
 8 | *.png
 9 | *.svg
10 | *_distribution.py
11 | 


--------------------------------------------------------------------------------
/scripts/align_csv.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | set -e
 4 | 
 5 | if [ $# -lt 1 ]; then
 6 | 	echo "usage: $0 <path/to/csv/file>"
 7 | fi
 8 | 
 9 | ALIGNED="$(column -o' ' -t -s, "$1")"
10 | echo "$ALIGNED" | head -n 1
11 | echo "$ALIGNED" | tail -n +2 | sort -k 8
12 | 


--------------------------------------------------------------------------------
/src/sepe-runner/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "sepe-runner"
 3 | version = "0.1.0"
 4 | edition = "2021"
 5 | authors = ["Leonardo G. Faé", "Renato B. Hoffmann"]
 6 | 
 7 | [dependencies]
 8 | clap = { version = "4.4", features = ["derive"] }
 9 | toml = {version = "0.8", default_features = false, features = ["parse"] }
10 | 


--------------------------------------------------------------------------------
/scripts/make_hash_from_regex.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | set -e
 4 | 
 5 | if [ $# -lt 1 ]; then
 6 | 	echo "usage: $0 <regex>"
 7 | fi
 8 | 
 9 | if [ "$(basename "$(pwd)")" = "scripts" ]; then
10 | 	cd ..
11 | fi
12 | 
13 | make --silent -j"$(nproc)" bin/keygen bin/keybuilder bin/keysynth
14 | ./bin/keysynth "$(./bin/keygen "$1" | ./bin/keybuilder)"
15 | # ./bin/keygen "$1" | ./bin/keybuilder
16 | 


--------------------------------------------------------------------------------
/src/keygen/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "keygen"
 3 | version = "0.1.0"
 4 | edition = "2021"
 5 | authors = ["Leonardo G. Faé", "Renato B. Hoffmann"]
 6 | 
 7 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 8 | 
 9 | [dependencies]
10 | clap = { version = "4.4", features = ["derive"] }
11 | rand = { version = "0.8", default_features = false, features = ["std_rng"] }
12 | rand_distr = { version = "0.4" }
13 | 


--------------------------------------------------------------------------------
/src/keybuilder/Makefile:
--------------------------------------------------------------------------------
 1 | # Compilation variables
 2 | COMMON_FLAGS  := -std=c18 -Wall -Wextra -pedantic -pipe
 3 | RELEASE_FLAGS := $(COMMON_FLAGS) -O2 -flto=auto -march=native
 4 | DEBUG_FLAGS   := $(COMMON_FLAGS) -Og -g3 -fsanitize=address,undefined
 5 | 
 6 | all: keybuilder Makefile
 7 | 
 8 | keybuilder-debug: src/main.c Makefile
 9 | 	$(CC) $(CFLAGS) $(DEBUG_FLAGS) -o $@ $< -lm
10 | 
11 | keybuilder: src/main.c Makefile
12 | 	$(CC) $(CFLAGS) $(RELEASE_FLAGS) -o $@ $< -lm
13 | 
14 | clean:
15 | 	rm -vf keybuilder keybuilder-debug
16 | 
17 | .PHONY: all clean
18 | 


--------------------------------------------------------------------------------
/src/keysynth/Makefile:
--------------------------------------------------------------------------------
 1 | TARGET=keysynth
 2 | 
 3 | # Compilation variables
 4 | COMMON_FLAGS  := -std=c++20 -Wall -Wextra -pedantic
 5 | RELEASE_FLAGS := $(COMMON_FLAGS) -O2 -pipe -flto=auto -march=native
 6 | DEBUG_FLAGS   := $(COMMON_FLAGS) -Og -g3 -fsanitize=address,undefined -mbmi2 -msse2 -msse3
 7 | 
 8 | all: $(TARGET) 
 9 | 
10 | keysynth-debug: src/main.cpp
11 | 	$(CXX) $(CXXFLAGS) $(DEBUG_FLAGS) -o $@ $^
12 | 
13 | keysynth: src/main.cpp
14 | 	$(CXX) $(CXXFLAGS) $(RELEASE_FLAGS) -o $@ $^
15 | 
16 | clean:
17 | 	rm -vf keysynth keysynth-debug
18 | 
19 | .PHONY: all clean
20 | 


--------------------------------------------------------------------------------
/scripts/install_abseil.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | set -e
 4 | 
 5 | ABSEIL_PATH="../deps/"
 6 | 
 7 | # Parse arguments
 8 | if [ $# -eq 1 ]; then
 9 |     ABSEIL_PATH=$1
10 | else
11 |     mkdir -p ../deps
12 | fi
13 | 
14 | # Check if directory exists
15 | #if [ ! -d "$ABSEIL_PATH" ]; then
16 | #    echo "Installation directory $ABSEIL_PATH DOES NOT exists!"
17 | #    exit 1
18 | #fi
19 | 
20 | # Convert into absolute path
21 | ABSEIL_PATH=$(cd "$ABSEIL_PATH"; pwd)
22 | ABSEIL_PATH="${ABSEIL_PATH}/abseil/"
23 | 
24 | echo "Installation path for Abseil: $ABSEIL_PATH"
25 | 
26 | CURRENT_DIR=$(pwd)
27 | 
28 | mkdir -p "${ABSEIL_PATH}"
29 | git clone https://github.com/abseil/abseil-cpp.git
30 | cd abseil-cpp/ && mkdir build && cd build
31 | cmake                                     \
32 | 	-DBUILD_TESTING=OFF                   \
33 | 	-DABSL_BUILD_TESTING=OFF              \
34 | 	-DABSL_USE_GOOGLETEST_HEAD=OFF        \
35 | 	-DCMAKE_CXX_STANDARD=17               \
36 | 	-DCMAKE_INSTALL_PREFIX="$ABSEIL_PATH" \
37 | 	..
38 | cmake --build . --target install -j"$(nproc)"
39 | cd "$CURRENT_DIR"
40 | rm -rf abseil-cpp
41 | 


--------------------------------------------------------------------------------
/results/gpt-prompts/ssn.txt:
--------------------------------------------------------------------------------
 1 | For a hash function, assume that keys are  SSN, always in the format 'nnn-nn-nnnn', where all characters are digits. The '-' character is a constant, so you can ignore it in your hash function. The fixed key size is 11 characters. 
 2 | 
 3 | The code is C++, and the keys are std::strings. Do not use std::hash. Assume you do not need to assert key format.
 4 | 
 5 | Produce an optimized hash function for this specific case with an unrolled for loop, and also consider that the constant character is always the same and in the same position.
 6 | 
 7 | ChatGPT
 8 | Certainly! Below is an example of an optimized hash function for this specific case using an unrolled for loop in C++:
 9 | 
10 | cpp
11 | Copy code
12 | #include <string>
13 | 
14 | size_t customHash(const std::string& key) {
15 |     constexpr size_t keySize = 11;
16 |     size_t hashValue = 0;
17 | 
18 |     // Unrolled for loop for better performance
19 |     for (size_t i = 0; i < keySize; i += 3) {
20 |         hashValue = 37 * hashValue + static_cast<size_t>(key[i] - '0');
21 |         hashValue = 37 * hashValue + static_cast<size_t>(key[i + 1] - '0');
22 |         hashValue = 37 * hashValue + static_cast<size_t>(key[i + 2] - '0');
23 |     }
24 | 
25 |     return hashValue;
26 | }
27 | This hash function takes advantage of the fixed key size and the fact that the '-' character is constant and in the same position. The loop is unrolled by three, as each group of three characters represents one part of the SSN. The multiplication by 37 is a common practice to mix the bits well, and subtracting '0' converts the character to its numeric value. This implementation should provide decent performance for this specific case.


--------------------------------------------------------------------------------
/Regexes.toml:
--------------------------------------------------------------------------------
 1 | # Regexes with \. should be \\. inside the TOML parser
 2 | 
 3 | [URL_COMPLEX]
 4 | regex = "http:/google\\.github\\.io/[a-z]{10}/version[0-9]{3}/doxygen/html/[a-z0-9]{20}\\.html"
 5 | hashes = ["STDHashSrc", "AbseilHash", "CityHash", "FNVHash", "PextUrlComplex", "OffXorUrlComplex", "GptUrlComplex", "GperfUrlComplex", "NaiveUrlComplex" , "AesUrlComplex"]
 6 | 
 7 | [URL_FIXED_PATTERN]
 8 | regex = "http:/google\\.github\\.io/farmhash/doxygen/html/[a-z0-9]{20}\\.html"
 9 | hashes = ["STDHashSrc", "AbseilHash", "CityHash", "FNVHash", "PextUrl", "OffXorUrl", "NaiveUrl", "GptUrl", "GperfUrl", "AesUrl"]
10 | 
11 | [MAC]
12 | regex = "([0-9a-fA-F]{2}-){5}[0-9a-fA-F]{2}"
13 | hashes = ["STDHashSrc", "AbseilHash", "CityHash", "FNVHash", "PextMac", "OffXorMac", "NaiveMac", "GptMac", "GperfMac", "AesMac"]
14 | 
15 | [CPF]
16 | regex = "(([0-9]{3})\\.){2}[0-9]{3}-[0-9]{2}"
17 | hashes = ["STDHashSrc", "AbseilHash", "CityHash", "FNVHash", "PextCPF", "OffXorCPF", "NaiveCPF", "GptCPF", "GperfCPF", "AesCPF"]
18 | 
19 | [SSN]
20 | regex = "[0-9]{3}-[0-9]{2}-[0-9]{4}"
21 | hashes = ["STDHashSrc", "AbseilHash", "CityHash", "FNVHash", "PextSSN", "OffXorSSN", "NaiveSSN", "GptSSN", "GperfSSN", "AesSSN"]
22 | 
23 | [IPV4]
24 | regex = "(([0-9]{3})\\.){3}[0-9]{3}"
25 | hashes = ["STDHashSrc", "AbseilHash", "CityHash", "FNVHash", "PextIPV4", "OffXorIPV4", "NaiveIPV4", "GptIPV4", "GperfIPV4", "AesIPV4"]
26 | 
27 | [IPV6]
28 | regex = "([0-9a-fA-F]{4}:){7}[0-9a-fA-F]{4}"
29 | hashes = ["STDHashSrc", "AbseilHash", "CityHash", "FNVHash", "PextIPV6", "OffXorIPV6", "NaiveIPV6", "GptIPV6", "GperfIPV6", "AesIPV6"]
30 | 
31 | [INTS]
32 | regex = "[0-9]{100}"
33 | hashes = ["STDHashSrc", "AbseilHash", "CityHash", "FNVHash", "PextINTS", "OffXorINTS", "NaiveINTS", "GptINTS", "GperfINTS", "AesINTS"]
34 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | all: bin bin/keygen bin/keybuilder bin/keysynth
 2 | 
 3 | benchmark: bin/keyuser bin/sepe-runner
 4 | 
 5 | keyuser: bin/keyuser
 6 | 
 7 | bin:
 8 | 	@mkdir -p bin/
 9 | 
10 | bin/keygen: $(shell find src/keygen/src/ -type f)
11 | 	cd src/keygen && cargo build --release
12 | 	cp src/keygen/target/release/keygen $@
13 | 
14 | bin/keyuser: $(shell find src/keyuser/src/ -type f) src/keyuser/Makefile
15 | 	make -C src/keyuser keyuser
16 | 	cp src/keyuser/keyuser $@
17 | 
18 | bin/keyuser-debug: $(shell find src/keyuser/src/ -type f) src/keyuser/Makefile
19 | 	make -C src/keyuser keyuser-debug
20 | 	cp src/keyuser/keyuser-debug $@
21 | 
22 | bin/keybuilder: $(shell find src/keybuilder/src/ -type f) src/keybuilder/Makefile
23 | 	make -C src/keybuilder keybuilder
24 | 	cp src/keybuilder/keybuilder $@
25 | 
26 | bin/keybuilder-debug: $(shell find src/keybuilder/src/ -type f) src/keybuilder/Makefile
27 | 	make -C src/keybuilder keybuilder-debug
28 | 	cp src/keybuilder/keybuilder-debug $@
29 | 
30 | bin/keysynth:  $(shell find src/keysynth/src/ -type f) src/keysynth/Makefile
31 | 	make -C src/keysynth keysynth
32 | 	cp src/keysynth/keysynth $@
33 | 
34 | bin/keysynth-debug:  $(shell find src/keysynth/src/ -type f) src/keysynth/Makefile
35 | 	make -C src/keysynth keysynth-debug
36 | 	cp src/keysynth/keysynth $@
37 | 
38 | bin/sepe-runner: $(shell find src/sepe-runner/src/ -type f)
39 | 	cd src/sepe-runner && cargo build --release
40 | 	cp src/sepe-runner/target/release/sepe-runner $@
41 | 
42 | clean:
43 | 	cd src/keygen &&      cargo clean
44 | 	cd src/sepe-runner && cargo clean
45 | 	make -C src/keyuser         clean
46 | 	make -C src/keysynth        clean
47 | 	make -C src/keybuilder      clean
48 | 	rm -rfv bin output *.csv
49 | 
50 | results-clear:
51 | 	make -C results/ clear
52 | 
53 | .PHONY: all clean
54 | 


--------------------------------------------------------------------------------
/scripts/distribution_benchmark.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | set -e
 4 | 
 5 | if [ "$(basename "$(pwd)")" = "scripts" ]; then
 6 | 	cd ..
 7 | fi
 8 | 
 9 | if [ ! -d output ]; then
10 | 	mkdir output
11 | fi
12 | 
13 | make -j"$(nproc)"
14 | make -j"$(nproc)" benchmark
15 | 
16 | NUM_OPS=10000
17 | NUM_KEYS="10000"
18 | REGEXES="$(sed  -n 's/^\[\(.*\)\]/\1/p' Regexes.toml)"
19 | REPETITIONS=1
20 | DISTRIBUTIONS="
21 | 0.7 0.2 0.1
22 | 0.6 0.2 0.2
23 | 0.4 0.3 0.3
24 | "
25 | DISTRIBUTIONS_COUNT="$(echo "$DISTRIBUTIONS" | wc -w)"
26 | 
27 | HISTOGRAM_DISTRIBUTION="normal uniform incremental"
28 | 
29 | for HD in $HISTOGRAM_DISTRIBUTION; do
30 |     for REGEX in $REGEXES; do
31 |         COUNT=0
32 |         for NUM_OP in $NUM_OPS; do
33 |             for NUM_KEY in $NUM_KEYS; do
34 |                 for ARG in $(seq 1 3 "$DISTRIBUTIONS_COUNT"); do
35 | 
36 |                     INSERTION="$(  echo "$DISTRIBUTIONS" | tr '\n' ' ' | awk "{print \$$((ARG + 0))}")"
37 |                     SEARCH="$(     echo "$DISTRIBUTIONS" | tr '\n' ' ' | awk "{print \$$((ARG + 1))}")"
38 |                     ELIMINATION="$(echo "$DISTRIBUTIONS" | tr '\n' ' ' | awk "{print \$$((ARG + 2))}")"
39 | 
40 |                     ./bin/sepe-runner \
41 |                         --histogram \
42 |                         --distribution "$HD" \
43 |                         --verbose \
44 |                         --operations "$NUM_OP" \
45 |                         --keys "$NUM_KEY" \
46 |                         --insert "$INSERTION" \
47 |                         --search "$SEARCH" \
48 |                         --elimination "$ELIMINATION" \
49 |                         --repetitions $REPETITIONS \
50 |                         --outfile "${COUNT}.csv" \
51 |                         "$REGEX"
52 | 
53 |                     COUNT=$((COUNT + 1))
54 |                 done
55 |             done
56 |         done
57 |     done
58 |     mv -v ./*.csv output/
59 |     zip -9 -o "$(date '+%Y-%m-%d_%Hh-%Mm-%Ss')""-histogram-${HD}.zip" -r output/*
60 | done
61 | 


--------------------------------------------------------------------------------
/results/gpt-prompts/url_fixed_pattern.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | User
 4 | For a hash function, assume that keys are URL, always in the format 'http:/google\.github\.io/farmhash/doxygen/html/[a-z0-9]{20}\.html', where all characters are alphanumerical. All characters outside the [a-z0-9]{20} range are constant, so you can ignore them in your hash function. The fixed key size is 70 characters.
 5 | 
 6 | The code is C++, and the keys are std::strings. Do not use std::hash. Assume you do not need to assert key format.
 7 | 
 8 | Produce an optimized hash function for this specific case with an unrolled for loop, and also consider that the constant character is always the same and in the same position.
 9 | 
10 | ChatGPT
11 | Certainly! Here's an example of an optimized hash function in C++ for your specific case using an unrolled for loop. The constant character outside the [a-z0-9]{20} range is ignored, and only the characters within the specified range are considered for the hash calculation:
12 | 
13 | cpp
14 | Copy code
15 | #include <string>
16 | 
17 | std::size_t customHash(const std::string& key) {
18 |     constexpr std::size_t keySize = 70;
19 |     constexpr std::size_t unrollFactor = 5; // Adjust as needed
20 | 
21 |     std::size_t hashValue = 0;
22 | 
23 |     // Unrolled loop for better performance
24 |     for (std::size_t i = 0; i < keySize; i += unrollFactor) {
25 |         hashValue ^= (static_cast<std::size_t>(key[i]) << 0) | 
26 |                      (static_cast<std::size_t>(key[i + 1]) << 8) | 
27 |                      (static_cast<std::size_t>(key[i + 2]) << 16) | 
28 |                      (static_cast<std::size_t>(key[i + 3]) << 24) | 
29 |                      (static_cast<std::size_t>(key[i + 4]) << 32);
30 |     }
31 | 
32 |     return hashValue;
33 | }
34 | In this example, the unrolled loop processes the string in chunks of 5 characters at a time (you can adjust unrollFactor as needed for your specific case). The XOR operation is used to combine the hash values of each chunk. This unrolling can help improve performance by reducing loop control overhead.
35 | 


--------------------------------------------------------------------------------
/src/keyuser/src/google-hashes/citycrc.hpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2011 Google, Inc.
 2 | //
 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | // of this software and associated documentation files (the "Software"), to deal
 5 | // in the Software without restriction, including without limitation the rights
 6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | // copies of the Software, and to permit persons to whom the Software is
 8 | // furnished to do so, subject to the following conditions:
 9 | //
10 | // The above copyright notice and this permission notice shall be included in
11 | // all copies or substantial portions of the Software.
12 | //
13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 | // THE SOFTWARE.
20 | //
21 | // CityHash, by Geoff Pike and Jyrki Alakuijala
22 | //
23 | // This file declares the subset of the CityHash functions that require
24 | // _mm_crc32_u64().  See the CityHash README for details.
25 | //
26 | // Functions in the CityHash family are not suitable for cryptography.
27 | 
28 | #ifndef CITY_HASH_CRC_H_
29 | #define CITY_HASH_CRC_H_
30 | 
31 | #include "city.hpp"
32 | 
33 | // Hash function for a byte array.
34 | uint128 CityHashCrc128(const char *s, size_t len);
35 | 
36 | // Hash function for a byte array.  For convenience, a 128-bit seed is also
37 | // hashed into the result.
38 | uint128 CityHashCrc128WithSeed(const char *s, size_t len, uint128 seed);
39 | 
40 | // Hash function for a byte array.  Sets result[0] ... result[3].
41 | void CityHashCrc256(const char *s, size_t len, uint64 *result);
42 | 
43 | #endif  // CITY_HASH_CRC_H_
44 | 


--------------------------------------------------------------------------------
/src/keyuser/Makefile:
--------------------------------------------------------------------------------
 1 | TARGET=keyuser
 2 | 
 3 | # Directories
 4 | SRC_DIR := src
 5 | OBJ_DIR := obj
 6 | 
 7 | # Find all .cpp and .c files in the project
 8 | SRCS := $(shell find . \( -name "*.cpp" -o -name "*.c" \) -printf "%P\n")
 9 | OBJS := $(shell find . \( -name "*.cpp" -o -name "*.c" \) -exec basename {} \;)
10 | OBJS := $(patsubst %.cpp,$(OBJ_DIR)/%.o,$(patsubst %.c,$(OBJ_DIR)/%.o,$(OBJS)))
11 | DEBUG_OBJS := $(patsubst %.cpp,$(OBJ_DIR)/debug_%.o,$(patsubst %.c,$(OBJ_DIR)/debug_%.o,$(SRCS)))
12 | 
13 | # Make, please look into subdirectories :)
14 | vpath %.cpp $(shell find $(SRC_DIR) -type d)
15 | vpath %.c $(shell find $(SRC_DIR) -type d)
16 | 
17 | # ABSL
18 | ABSL_DIR := $(shell realpath ../../deps/abseil)
19 | ABSL_LIB := $(ABSL_DIR)/lib
20 | ABSL_INC := $(ABSL_DIR)/include
21 | ABSL_LIB_FILES :=-labsl_hash -labsl_city -labsl_low_level_hash
22 | 
23 | ifeq ($(PLATFORM),x86_64)
24 | 	INTRINSICS=-mbmi2 -msse2 -msse3
25 | endif
26 | 
27 | ifeq ($(PLATFORM),aarch64)
28 | 	INTRINSICS=-march=armv8-a+crypto
29 | endif
30 | 
31 | # Compilation variables
32 | CXXFLAGS += -std=c++20 
33 | COMMON_FLAGS  := -Wall -Wextra -pedantic -I$(ABSL_INC) -L$(ABSL_LIB) -Igperf_hashes/
34 | RELEASE_FLAGS := $(COMMON_FLAGS) -O2 -pipe -flto=auto -march=native $(INTRINSICS)
35 | DEBUG_FLAGS   := $(COMMON_FLAGS) -Og -g3 -fsanitize=address,undefined $(INTRINSICS)
36 | 
37 | all: $(TARGET)
38 | 
39 | # CXX objects
40 | $(OBJ_DIR)/debug_%.o: %.cpp $(OBJ_DIR) Makefile
41 | 	$(CXX) $(CXXFLAGS) $(DEBUG_FLAGS)  -c $< -o $@
42 | $(OBJ_DIR)/%.o: %.cpp $(OBJ_DIR) Makefile
43 | 	$(CXX) $(CXXFLAGS) $(RELEASE_FLAGS) -c $< -o $@
44 | 
45 | # CC objects
46 | $(OBJ_DIR)/debug_%.o: %.c $(OBJ_DIR) Makefile
47 | 	$(CC) $(CFLAGS) $(DEBUG_FLAGS) -c $< -o $@
48 | $(OBJ_DIR)/%.o: %.c $(OBJ_DIR) Makefile
49 | 	$(CC) $(CFLAGS) $(RELEASE_FLAGS) -c $< -o $@
50 | 
51 | $(OBJ_DIR):
52 | 	@mkdir -p obj/
53 | 
54 | keyuser-debug: $(DEBUG_OBJS)
55 | 	$(CXX)  $(CXXFLAGS) $(DEBUG_FLAGS) -o $@ $^ $(ABSL_LIB_FILES)
56 | 
57 | keyuser: $(OBJS)
58 | 	$(CXX) $(CXXFLAGS) $(RELEASE_FLAGS) -o $@ $^ $(ABSL_LIB_FILES)
59 | 
60 | clean:
61 | 	rm -vf keyuser keyuser-debug  \
62 | 		$(DEBUG_OBJS) \
63 | 		$(OBJS)
64 | 
65 | .PHONY: all clean tests
66 | 


--------------------------------------------------------------------------------
/results/gpt-prompts/cpf.txt:
--------------------------------------------------------------------------------
 1 | User
 2 | For a hash function, assume that keys are Brazilian CPF, always in the format 'nnn.nnn.nnn-nn', where all characters are digits. The '-' and '.' characters are constant, so you can ignore them in your hash function. The fixed key size is 14 characters.
 3 | 
 4 | The code is C++, and the keys are std::strings. Do not use std::hash. Assume you do not need to assert key format.
 5 | 
 6 | Produce an optimized hash function for this specific case with an unrolled for loop, and also consider that the constant character is always the same and in the same position.
 7 | 
 8 | ChatGPT
 9 | Certainly! To create an optimized hash function for the given case (Brazilian CPF keys with a fixed format), you can use a simple unrolled loop along with a mix of prime numbers for the hash. Here's an example implementation in C++:
10 | 
11 | cpp
12 | Copy code
13 | #include <string>
14 | 
15 | std::size_t customHash(const std::string& key) {
16 |     const std::size_t prime = 31; // A prime number for hashing
17 | 
18 |     // Unrolled loop for calculating the hash
19 |     std::size_t hashValue = 0;
20 |     hashValue += key[0] * 1000000000000ULL;
21 |     hashValue += key[1] * 100000000000ULL;
22 |     hashValue += key[2] * 10000000000ULL;
23 |     hashValue += key[4] * 1000000000ULL;
24 |     hashValue += key[5] * 100000000ULL;
25 |     hashValue += key[6] * 10000000ULL;
26 |     hashValue += key[8] * 1000000ULL;
27 |     hashValue += key[9] * 100000ULL;
28 |     hashValue += key[10] * 10000ULL;
29 |     hashValue += key[11] * 1000ULL;
30 |     hashValue += key[12] * 100ULL;
31 |     hashValue += key[13] * 10ULL;
32 |     hashValue += key[14] * 1ULL;
33 | 
34 |     // Mixing with a prime number
35 |     hashValue = (hashValue * prime);
36 | 
37 |     return hashValue;
38 | }
39 | 
40 | int main() {
41 |     std::string cpf = "123.456.789-09";
42 |     std::size_t hashValue = customHash(cpf);
43 |     // Use hashValue as needed...
44 |     return 0;
45 | }
46 | This hash function unrolls the loop, considering that the constant characters ('.' and '-') are ignored, and calculates the hash based on the position of each digit in the CPF. The use of prime numbers in the mixing step helps improve the distribution of the hash values. Adjust the prime number and the unrolled loop positions as needed for further optimization.
47 | 


--------------------------------------------------------------------------------
/results/gpt-prompts/ipv4.txt:
--------------------------------------------------------------------------------
 1 | For a hash function, assume that keys are  IPV4, always in the format '123.456.789.123', where all characters are digits. The '.' character is a constant, so you can ignore it in your hash function. The fixed key size is 15 characters. 
 2 | 
 3 | The code is C++, and the keys are std::strings. Do not use std::hash. Assume you do not need to assert key format.
 4 | 
 5 | Produce an optimized hash function for this specific case with an unrolled for loop, and also consider that the constant character is always the same and in the same position.
 6 | ChatGPT
 7 | Certainly! Here's an optimized hash function for the given case in C++ using an unrolled for loop:
 8 | 
 9 | cpp
10 | Copy code
11 | #include <string>
12 | 
13 | // Custom hash function for IPV4 keys
14 | std::size_t customHash(const std::string& key) {
15 |     // Assuming key.size() is always 15
16 |     const char constantChar = '.'; // Constant character
17 | 
18 |     // Unrolled for loop for better performance
19 |     std::size_t hashValue = 0;
20 |     hashValue += static_cast<std::size_t>(key[0]) * 31;
21 |     hashValue += static_cast<std::size_t>(key[1]) * 37;
22 |     hashValue += static_cast<std::size_t>(key[2]) * 41;
23 |     hashValue += static_cast<std::size_t>(key[3]) * 43;
24 |     hashValue += static_cast<std::size_t>(key[4]) * 47;
25 |     hashValue += static_cast<std::size_t>(key[5]) * 53;
26 |     hashValue += static_cast<std::size_t>(key[6]) * 59;
27 |     hashValue += static_cast<std::size_t>(key[7]) * 61;
28 |     hashValue += static_cast<std::size_t>(key[8]) * 67;
29 |     hashValue += static_cast<std::size_t>(key[9]) * 71;
30 |     hashValue += static_cast<std::size_t>(key[10]) * 73;
31 |     hashValue += static_cast<std::size_t>(key[11]) * 79;
32 |     hashValue += static_cast<std::size_t>(key[12]) * 83;
33 |     hashValue += static_cast<std::size_t>(key[13]) * 89;
34 |     hashValue += static_cast<std::size_t>(key[14]) * 97;
35 | 
36 |     return hashValue;
37 | }
38 | 
39 | int main() {
40 |     std::string ipv4Key = "123.456.789.123";
41 |     std::size_t hashValue = customHash(ipv4Key);
42 | 
43 |     // Use hashValue as needed
44 |     return 0;
45 | }
46 | This hash function uses an unrolled for loop to calculate the hash value by considering each character in the key. Adjust the prime numbers in the multiplication to suit your specific needs and ensure a good distribution of hash values.
47 | 
48 | 
49 | 
50 | 
51 | 
52 | 
53 | 


--------------------------------------------------------------------------------
/src/keyuser/src/gperf-hashes/gperf-hashes.h:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @file gperf-hashes.h
 3 |  * @brief This file contains a collection of hash functions generated by gperf using 1000 keys generated by keygen using the default seed.
 4 |  */
 5 | 
 6 | #ifndef GPERF_HASHES_HPP
 7 | #define GPERF_HASHES_HPP
 8 | 
 9 | #include <stdlib.h>
10 | 
11 | /**
12 |  * @brief Calculates the hash of a URL complex string.
13 |  * 
14 |  * @param str The string to hash.
15 |  * @param len The length of the string.
16 |  * @return The hash of the string.
17 |  */
18 | unsigned int
19 | GperfUrlComplexHash (const char *str, size_t len);
20 | 
21 | /**
22 |  * @brief Calculates the hash of a URL string.
23 |  * 
24 |  * @param str The string to hash.
25 |  * @param len The length of the string.
26 |  * @return The hash of the string.
27 |  */
28 | unsigned int
29 | GperfUrlHash (const char *str, size_t len);
30 | 
31 | /**
32 |  * @brief Calculates the hash of a MAC address string.
33 |  * 
34 |  * @param str The string to hash.
35 |  * @param len The length of the string.
36 |  * @return The hash of the string.
37 |  */
38 | unsigned int
39 | GperfMACHash (const char *str, size_t len);
40 | 
41 | /**
42 |  * @brief Calculates the hash of a CPF number string.
43 |  * 
44 |  * @param str The string to hash.
45 |  * @param len The length of the string.
46 |  * @return The hash of the string.
47 |  */
48 | unsigned int
49 | GperfCPFHash (const char *str, size_t len);
50 | 
51 | /**
52 |  * @brief Calculates the hash of a SSN number string.
53 |  * 
54 |  * @param str The string to hash.
55 |  * @param len The length of the string.
56 |  * @return The hash of the string.
57 |  */
58 | unsigned int
59 | GperfSSNHash (const char *str, size_t len);
60 | 
61 | /**
62 |  * @brief Calculates the hash of an IPV4 address string.
63 |  * 
64 |  * @param str The string to hash.
65 |  * @param len The length of the string.
66 |  * @return The hash of the string.
67 |  */
68 | unsigned int
69 | GperfIPV4Hash (const char *str, size_t len);
70 | 
71 | /**
72 |  * @brief Calculates the hash of an IPV6 address string.
73 |  * 
74 |  * @param str The string to hash.
75 |  * @param len The length of the string.
76 |  * @return The hash of the string.
77 |  */
78 | unsigned int
79 | GperfIPV6Hash (const char *str, size_t len);
80 | 
81 | /**
82 |  * @brief Calculates the hash of an INTS string.
83 |  * 
84 |  * @param str The string to hash.
85 |  * @param len The length of the string.
86 |  * @return The hash of the string.
87 |  */
88 | unsigned int
89 | GperfINTSHash (const char *str, size_t len);
90 | 
91 | #endif


--------------------------------------------------------------------------------
/results/gpt-prompts/url_complex.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | User
 4 | For a hash function, assume that keys are URL, always in the format 'http:/google\.github\.io/[a-z]{10}/version[0-9]{3}/doxygen/html/[a-z0-9]{20}\.html', where all characters are alphanumerical. All characters outside the [a-z]{10}, [0-9]{3}, and  [a-z0-9]{20}  ranges are constant, so you can ignore them in your hash function. The fixed key size is 82 characters.
 5 | 
 6 | The code is C++, and the keys are std::strings. Do not use std::hash. Assume you do not need to assert key format.
 7 | 
 8 | Produce an optimized hash function for this specific case with an unrolled for loop, and also consider that the constant character is always the same and in the same position.
 9 | ChatGPT
10 | Certainly! Here's an optimized hash function for your specific case in C++ with an unrolled loop:
11 | 
12 | cpp
13 | Copy code
14 | #include <string>
15 | 
16 | uint64_t customHash(const std::string& key) {
17 |     // Constants
18 |     const char constantChar = '/';
19 | 
20 |     // Unrolled loop for hash calculation
21 |     uint64_t hash = 0;
22 | 
23 |     // Process [a-z]{10}
24 |     hash ^= (key[7] << 56) | (key[8] << 48) | (key[9] << 40) | (key[10] << 32) |
25 |             (key[11] << 24) | (key[12] << 16) | (key[13] << 8) | key[14];
26 | 
27 |     // Process [0-9]{3}
28 |     hash ^= (key[27] << 56) | (key[28] << 48) | (key[29] << 40);
29 | 
30 |     // Process [a-z0-9]{20}
31 |     hash ^= (key[46] << 56) | (key[47] << 48) | (key[48] << 40) | (key[49] << 32) |
32 |             (key[50] << 24) | (key[51] << 16) | (key[52] << 8) | key[53];
33 | 
34 |     hash ^= (key[54] << 56) | (key[55] << 48) | (key[56] << 40) | (key[57] << 32) |
35 |             (key[58] << 24) | (key[59] << 16) | (key[60] << 8) | key[61];
36 | 
37 |     hash ^= (key[62] << 56) | (key[63] << 48) | (key[64] << 40) | (key[65] << 32) |
38 |             (key[66] << 24) | (key[67] << 16) | (key[68] << 8) | key[69];
39 | 
40 |     hash ^= (key[70] << 56) | (key[71] << 48) | (key[72] << 40) | (key[73] << 32) |
41 |             (key[74] << 24) | (key[75] << 16) | (key[76] << 8) | key[77];
42 | 
43 |     hash ^= (key[78] << 56) | (key[79] << 48) | (key[80] << 40) | (key[81] << 32);
44 | 
45 |     return hash;
46 | }
47 | This code assumes that the constant character is always the same ('/') and in the same position. It unrolls the loop manually, taking advantage of the fixed key size and structure. Note that this is a simple example, and depending on your specific use case, you might need to consider more sophisticated hash functions for better distribution properties.
48 | 


--------------------------------------------------------------------------------
/results/gpt-prompts/ints.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | User
 4 | 
 5 | For a hash function, assume that keys are INTEGERS with a fixed key size of 100 characters.
 6 | 
 7 | The code is C++, and the keys are std::strings. Do not use std::hash. Assume you do not need to assert key format.
 8 | 
 9 | Produce an optimized hash function for this specific case with an unrolled for loop, and also consider that the constant character is always the same and in the same position.
10 | ChatGPT
11 | Certainly! For a hash function that takes integer keys with a fixed size of 100 characters, and the keys are represented as std::strings in C++, you can create a custom hash function. In this example, I'll assume that the constant character is always at position 42 in the string. Below is a simple example of an optimized hash function with an unrolled for loop:
12 | 
13 | cpp
14 | Copy code
15 | #include <string>
16 | 
17 | std::size_t customHash(const std::string& key) {
18 |     constexpr std::size_t keySize = 100;
19 |     constexpr std::size_t unrollFactor = 10; // Unroll the loop by a factor of 10 for optimization
20 | 
21 |     std::size_t hashValue = 0;
22 | 
23 |     // Unrolled loop for better performance
24 |     for (std::size_t i = 0; i < keySize; i += unrollFactor) {
25 |         hashValue ^= (static_cast<std::size_t>(key[i]) << 7) ^ (static_cast<std::size_t>(key[i + 1]) << 14)
26 |                    ^ (static_cast<std::size_t>(key[i + 2]) << 21) ^ (static_cast<std::size_t>(key[i + 3]) << 28)
27 |                    ^ (static_cast<std::size_t>(key[i + 4]) << 35) ^ (static_cast<std::size_t>(key[i + 5]) << 42)
28 |                    ^ (static_cast<std::size_t>(key[i + 6]) << 49) ^ (static_cast<std::size_t>(key[i + 7]) << 56)
29 |                    ^ (static_cast<std::size_t>(key[i + 8]) << 63) ^ (static_cast<std::size_t>(key[i + 9]) << 70);
30 |     }
31 | 
32 |     // Handle the remaining characters in the key (if any)
33 |     for (std::size_t i = (keySize / unrollFactor) * unrollFactor; i < keySize; ++i) {
34 |         hashValue ^= static_cast<std::size_t>(key[i]) << ((i % unrollFactor) * 7);
35 |     }
36 | 
37 |     return hashValue;
38 | }
39 | 
40 | int main() {
41 |     std::string key = "0123456789"; // Replace with your actual key
42 | 
43 |     std::size_t hashValue = customHash(key);
44 | 
45 |     // Use hashValue as needed
46 |     return 0;
47 | }
48 | This example unrolls the loop by a factor of 10, and the XOR operations help combine the hash values of individual characters. You can adjust the unroll factor based on your specific requirements and experiment to find the optimal value for your use case.
49 | 


--------------------------------------------------------------------------------
/results/gpt-prompts/ipv6.txt:
--------------------------------------------------------------------------------
 1 | For a hash function, assume that keys are  IPV6, always in the format '2001:0db8:85a3:0000:0000:8a2e:0370:7334', where all characters are hexadecimal. The ':' character is a constant, so you can ignore it in your hash function. The fixed key size is 39 characters.
 2 | 
 3 | The code is C++, and the keys are std::strings. Do not use std::hash. Assume you do not need to assert key format.
 4 | 
 5 | Produce an optimized hash function for this specific case with an unrolled for loop, and also consider that the constant character is always the same and in the same position.
 6 | ChatGPT
 7 | Certainly! Here's an optimized hash function in C++ for the specified case using an unrolled loop:
 8 | 
 9 | cpp
10 | Copy code
11 | #include <string>
12 | 
13 | // Hash function for IPv6 keys
14 | std::size_t hashIPv6(const std::string& key) {
15 |     // Assuming key is always in the format '2001:0db8:85a3:0000:0000:8a2e:0370:7334'
16 |     // and ':' is ignored in the hash computation
17 | 
18 |     // Unrolled loop for optimized performance
19 |     std::size_t hashValue = 0;
20 | 
21 |     // Process each segment (assuming 8 segments separated by ':')
22 |     hashValue ^= (std::size_t(key[0]) << 24) | (std::size_t(key[1]) << 16) | (std::size_t(key[2]) << 8) | std::size_t(key[3]);
23 |     hashValue ^= (std::size_t(key[5]) << 24) | (std::size_t(key[6]) << 16) | (std::size_t(key[7]) << 8) | std::size_t(key[8]);
24 |     hashValue ^= (std::size_t(key[10]) << 24) | (std::size_t(key[11]) << 16) | (std::size_t(key[12]) << 8) | std::size_t(key[13]);
25 |     hashValue ^= (std::size_t(key[15]) << 24) | (std::size_t(key[16]) << 16) | (std::size_t(key[17]) << 8) | std::size_t(key[18]);
26 |     hashValue ^= (std::size_t(key[20]) << 24) | (std::size_t(key[21]) << 16) | (std::size_t(key[22]) << 8) | std::size_t(key[23]);
27 |     hashValue ^= (std::size_t(key[25]) << 24) | (std::size_t(key[26]) << 16) | (std::size_t(key[27]) << 8) | std::size_t(key[28]);
28 |     hashValue ^= (std::size_t(key[30]) << 24) | (std::size_t(key[31]) << 16) | (std::size_t(key[32]) << 8) | std::size_t(key[33]);
29 |     hashValue ^= (std::size_t(key[35]) << 24) | (std::size_t(key[36]) << 16) | (std::size_t(key[37]) << 8) | std::size_t(key[38]);
30 | 
31 |     return hashValue;
32 | }
33 | 
34 | int main() {
35 |     std::string ipv6Key = "2001:0db8:85a3:0000:0000:8a2e:0370:7334";
36 |     std::size_t hashValue = hashIPv6(ipv6Key);
37 |     // Use hashValue as needed
38 |     return 0;
39 | }
40 | This code unrolls the loop to process each segment of the IPv6 key. It XORs the values of each segment, assuming that the ':' character is ignored in the hash computation. This unrolling can lead to better performance, especially when dealing with a fixed-size input like an IPv6 key.
41 | 
42 | 


--------------------------------------------------------------------------------
/scripts/benchmark.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | set -e
 4 | 
 5 | if [ "$(basename "$(pwd)")" = "scripts" ]; then
 6 |         cd ..
 7 | fi
 8 | 
 9 | if [ ! -d output ]; then
10 |         mkdir output
11 | fi
12 | 
13 | make -j"$(nproc)"
14 | make -j"$(nproc)" benchmark
15 | 
16 | DISTRIBUTIONS="normal uniform"
17 | NUM_OPS=10000
18 | NUM_KEYS="500 2000 10000"
19 | REGEXES="$(sed  -n 's/^\[\(.*\)\]/\1/p' Regexes.toml)"
20 | REPETITIONS=10
21 | PERCENTAGES="
22 | 0.7 0.2 0.1
23 | 0.6 0.2 0.2
24 | 0.4 0.3 0.3
25 | "
26 | PERCENTAGES_COUNT="$(echo "$PERCENTAGES" | wc -w)"
27 | 
28 | for REGEX in $REGEXES; do
29 |         COUNT=0
30 | 		for DISTRIBUTION in $DISTRIBUTIONS; do
31 | 			for NUM_OP in $NUM_OPS; do
32 | 					for NUM_KEY in $NUM_KEYS; do
33 | 							for ARG in $(seq 1 3 "$PERCENTAGES_COUNT"); do
34 | 
35 | 									INSERTION="$(  echo "$PERCENTAGES" | tr '\n' ' ' | awk "{print \$$((ARG + 0))}")"
36 | 									SEARCH="$(     echo "$PERCENTAGES" | tr '\n' ' ' | awk "{print \$$((ARG + 1))}")"
37 | 									ELIMINATION="$(echo "$PERCENTAGES" | tr '\n' ' ' | awk "{print \$$((ARG + 2))}")"
38 | 
39 | 									./bin/sepe-runner \
40 | 											--verbose \
41 | 											--distribution "$DISTRIBUTION" \
42 | 											--operations "$NUM_OP" \
43 | 											--keys "$NUM_KEY" \
44 | 											--insert "$INSERTION" \
45 | 											--search "$SEARCH" \
46 | 											--elimination "$ELIMINATION" \
47 | 											--repetitions $REPETITIONS \
48 | 											--outfile "${COUNT}.csv" \
49 | 											"$REGEX"
50 | 
51 | 									COUNT=$((COUNT + 1))
52 | 							done
53 | 					done
54 | 			done
55 | 		done
56 | done
57 | 
58 | mv -v ./*.csv output/
59 | zip -9 -o RQ1_RQ2.zip -r output/*
60 | 
61 | for REGEX in $REGEXES; do
62 |         COUNT=0
63 | 		for DISTRIBUTION in $DISTRIBUTIONS; do
64 | 			for NUM_OP in $NUM_OPS; do
65 | 					for NUM_KEY in $NUM_KEYS; do
66 | 							for ARG in $(seq 1 3 "$PERCENTAGES_COUNT"); do
67 | 
68 | 									INSERTION="$(  echo "$PERCENTAGES" | tr '\n' ' ' | awk "{print \$$((ARG + 0))}")"
69 | 									SEARCH="$(     echo "$PERCENTAGES" | tr '\n' ' ' | awk "{print \$$((ARG + 1))}")"
70 | 									ELIMINATION="$(echo "$PERCENTAGES" | tr '\n' ' ' | awk "{print \$$((ARG + 2))}")"
71 | 
72 | 									./bin/sepe-runner \
73 | 											--verbose \
74 | 											--hash-performance \
75 | 											--distribution "$DISTRIBUTION" \
76 | 											--operations "$NUM_OP" \
77 | 											--keys "$NUM_KEY" \
78 | 											--insert "$INSERTION" \
79 | 											--search "$SEARCH" \
80 | 											--elimination "$ELIMINATION" \
81 | 											--repetitions $REPETITIONS \
82 | 											--outfile "${COUNT}.csv" \
83 | 											"$REGEX"
84 | 
85 | 									COUNT=$((COUNT + 1))
86 | 							done
87 | 					done
88 | 			done
89 | 		done
90 | done
91 | 
92 | mv -v ./*.csv output/
93 | zip -9 -o RQ1_RQ2_hash-performance.zip -r output/*


--------------------------------------------------------------------------------
/results/gpt-prompts/mac.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | User
 4 | For a hash function, assume that keys are MAC, always in the format 'XX:XX:XX:XX:XX:XX', where all characters are hexadecimal. The ':' character is constant, so you can ignore them in your hash function. The fixed key size is 17 characters.
 5 | 
 6 | The code is C++, and the keys are std::strings. Do not use std::hash. Assume you do not need to assert key format.
 7 | 
 8 | Produce an optimized hash function for this specific case with an unrolled for loop, and also consider that the constant character is always the same and in the same position.
 9 | ChatGPT
10 | Certainly! Here's an example of an optimized hash function in C++ for the given scenario using an unrolled loop:
11 | 
12 | cpp
13 | Copy code
14 | #include <string>
15 | 
16 | std::size_t customHash(const std::string& key) {
17 |     // Assuming the key is always in the format 'XX:XX:XX:XX:XX:XX'
18 |     // where the ':' character is at positions 2, 5, 8, 11, 14
19 |     // and ignoring those positions while hashing
20 | 
21 |     const char constantChar = ':'; // The constant character
22 | 
23 |     // Unrolled loop for hash calculation
24 |     std::size_t hashValue = 0;
25 | 
26 |     hashValue ^= static_cast<std::size_t>(key[0]) + 0x9e3779b9 + (hashValue << 6) + (hashValue >> 2);
27 |     hashValue ^= static_cast<std::size_t>(key[1]) + 0x9e3779b9 + (hashValue << 6) + (hashValue >> 2);
28 | 
29 |     hashValue ^= static_cast<std::size_t>(key[3]) + 0x9e3779b9 + (hashValue << 6) + (hashValue >> 2);
30 |     hashValue ^= static_cast<std::size_t>(key[4]) + 0x9e3779b9 + (hashValue << 6) + (hashValue >> 2);
31 | 
32 |     hashValue ^= static_cast<std::size_t>(key[6]) + 0x9e3779b9 + (hashValue << 6) + (hashValue >> 2);
33 |     hashValue ^= static_cast<std::size_t>(key[7]) + 0x9e3779b9 + (hashValue << 6) + (hashValue >> 2);
34 | 
35 |     hashValue ^= static_cast<std::size_t>(key[9]) + 0x9e3779b9 + (hashValue << 6) + (hashValue >> 2);
36 |     hashValue ^= static_cast<std::size_t>(key[10]) + 0x9e3779b9 + (hashValue << 6) + (hashValue >> 2);
37 | 
38 |     hashValue ^= static_cast<std::size_t>(key[12]) + 0x9e3779b9 + (hashValue << 6) + (hashValue >> 2);
39 |     hashValue ^= static_cast<std::size_t>(key[13]) + 0x9e3779b9 + (hashValue << 6) + (hashValue >> 2);
40 | 
41 |     hashValue ^= static_cast<std::size_t>(key[15]) + 0x9e3779b9 + (hashValue << 6) + (hashValue >> 2);
42 |     hashValue ^= static_cast<std::size_t>(key[16]) + 0x9e3779b9 + (hashValue << 6) + (hashValue >> 2);
43 | 
44 |     // Combine with constant character
45 |     hashValue ^= static_cast<std::size_t>(constantChar) + 0x9e3779b9 + (hashValue << 6) + (hashValue >> 2);
46 | 
47 |     return hashValue;
48 | }
49 | 
50 | int main() {
51 |     std::string key = "1A:2B:3C:4D:5E:6F";
52 |     std::size_t hashValue = customHash(key);
53 |     // Use hashValue as needed
54 |     return 0;
55 | }
56 | This hash function uses the Jenkins hash algorithm, and it's unrolled to potentially improve performance. Note that this is just one example, and the actual performance gains may depend on the specific compiler and platform. Adjustments may be needed based on your specific requirements and testing.
57 | 


--------------------------------------------------------------------------------
/src/keyuser/src/registry.hpp:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @file registry.hpp
 3 |  * @brief This file contains macros for registering benchmarks.
 4 |  * @brief New Custom Hash functions must be manually added to the REGISTER_ALL_BENCHMARKS macro.
 5 |  */
 6 | 
 7 | /**
 8 |  * @def DECLARE_ONE_BENCH(name, hashname)
 9 |  * @brief This macro creates a new benchmark object.
10 |  * @param name The name of the benchmark.
11 |  * @param hashname The name of the hash function used in the benchmark.
12 |  * @return A pointer to the new benchmark object.
13 |  */
14 | #define DECLARE_ONE_BENCH(name, hashname) (Benchmark*)new name<hashname>(#name,#hashname)
15 | 
16 | /**
17 |  * @def REGISTER_BENCHMARKS(hashname)
18 |  * @brief This macro registers benchmarks for a specific hash function.
19 |  * @param hashname The name of the hash function.
20 |  */
21 | #define REGISTER_BENCHMARKS(hashname)   benchmarks.push_back(DECLARE_ONE_BENCH(UnorderedMapBench, hashname)); \
22 |                                         benchmarks.push_back(DECLARE_ONE_BENCH(UnorderedMultiMapBench, hashname)); \
23 |                                         benchmarks.push_back(DECLARE_ONE_BENCH(UnorderedSetBench, hashname)); \
24 |                                         benchmarks.push_back(DECLARE_ONE_BENCH(UnorderedMultisetBench, hashname));
25 | 
26 | /**
27 |  * @def REGISTER_ALL_BENCHMARKS
28 |  * @brief This macro registers all benchmarks for all hash functions.
29 |  */
30 | #define REGISTER_ALL_BENCHMARKS \
31 |     REGISTER_BENCHMARKS(AbseilHash); \
32 |     REGISTER_BENCHMARKS(STDHashBin); \
33 |     REGISTER_BENCHMARKS(STDHashSrc); \
34 |     REGISTER_BENCHMARKS(FNVHash); \
35 |     REGISTER_BENCHMARKS(CityHash); \
36 |     REGISTER_BENCHMARKS(IPV4HashUnrolled); \
37 |     REGISTER_BENCHMARKS(IPV4HashMove); \
38 |     REGISTER_BENCHMARKS(IntSimdHash); \
39 |     REGISTER_BENCHMARKS(PextUrlComplex); \
40 |     REGISTER_BENCHMARKS(PextUrl); \
41 |     REGISTER_BENCHMARKS(PextMac); \
42 |     REGISTER_BENCHMARKS(PextCPF); \
43 |     REGISTER_BENCHMARKS(PextSSN); \
44 |     REGISTER_BENCHMARKS(PextIPV4); \
45 |     REGISTER_BENCHMARKS(PextIPV6); \
46 |     REGISTER_BENCHMARKS(PextINTS); \
47 |     REGISTER_BENCHMARKS(OffXorUrlComplex); \
48 |     REGISTER_BENCHMARKS(OffXorUrl); \
49 |     REGISTER_BENCHMARKS(OffXorMac); \
50 |     REGISTER_BENCHMARKS(OffXorCPF); \
51 |     REGISTER_BENCHMARKS(OffXorSSN); \
52 |     REGISTER_BENCHMARKS(OffXorIPV4); \
53 |     REGISTER_BENCHMARKS(OffXorIPV6); \
54 |     REGISTER_BENCHMARKS(OffXorINTS); \
55 |     REGISTER_BENCHMARKS(NaiveUrlComplex); \
56 |     REGISTER_BENCHMARKS(NaiveUrl); \
57 |     REGISTER_BENCHMARKS(NaiveMac); \
58 |     REGISTER_BENCHMARKS(NaiveCPF); \
59 |     REGISTER_BENCHMARKS(NaiveSSN); \
60 |     REGISTER_BENCHMARKS(NaiveIPV4); \
61 |     REGISTER_BENCHMARKS(NaiveIPV6); \
62 |     REGISTER_BENCHMARKS(NaiveINTS); \
63 |     REGISTER_BENCHMARKS(AesUrlComplex); \
64 |     REGISTER_BENCHMARKS(AesUrl); \
65 |     REGISTER_BENCHMARKS(AesMac); \
66 |     REGISTER_BENCHMARKS(AesCPF); \
67 |     REGISTER_BENCHMARKS(AesSSN); \
68 |     REGISTER_BENCHMARKS(AesIPV4); \
69 |     REGISTER_BENCHMARKS(AesIPV6); \
70 |     REGISTER_BENCHMARKS(AesINTS); \
71 |     REGISTER_BENCHMARKS(NaiveSimdUrlComplex); \
72 |     REGISTER_BENCHMARKS(NaiveSimdUrl); \
73 |     REGISTER_BENCHMARKS(NaiveSimdIPV6); \
74 |     REGISTER_BENCHMARKS(NaiveSimdINTS); \
75 |     REGISTER_BENCHMARKS(GptUrlComplex); \
76 |     REGISTER_BENCHMARKS(GptUrl); \
77 |     REGISTER_BENCHMARKS(GptMac); \
78 |     REGISTER_BENCHMARKS(GptCPF); \
79 |     REGISTER_BENCHMARKS(GptSSN); \
80 |     REGISTER_BENCHMARKS(GptIPV4); \
81 |     REGISTER_BENCHMARKS(GptIPV6); \
82 |     REGISTER_BENCHMARKS(GptINTS); \
83 |     REGISTER_BENCHMARKS(GperfUrlComplex); \
84 |     REGISTER_BENCHMARKS(GperfUrl); \
85 |     REGISTER_BENCHMARKS(GperfMac); \
86 |     REGISTER_BENCHMARKS(GperfCPF); \
87 |     REGISTER_BENCHMARKS(GperfSSN); \
88 |     REGISTER_BENCHMARKS(GperfIPV4); \
89 |     REGISTER_BENCHMARKS(GperfIPV6); \
90 |     REGISTER_BENCHMARKS(GperfINTS); \
91 |     REGISTER_BENCHMARKS(PextMurmurUrlComplex); \
92 |     REGISTER_BENCHMARKS(PextMurmurINTS);
93 | 


--------------------------------------------------------------------------------
/src/keyuser/src/gperf-hashes/url_fixed_pattern.c:
--------------------------------------------------------------------------------
 1 | /* ANSI-C code produced by gperf version 3.1 */
 2 | /* Command-line: ./bin/gperf input.txt  */
 3 | /* Computed positions: -k'46-47,60,62' */
 4 | 
 5 | #include "gperf-hashes.h"
 6 | 
 7 | #if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
 8 |       && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \
 9 |       && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \
10 |       && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \
11 |       && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \
12 |       && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \
13 |       && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \
14 |       && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \
15 |       && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \
16 |       && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \
17 |       && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \
18 |       && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \
19 |       && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \
20 |       && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \
21 |       && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \
22 |       && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \
23 |       && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \
24 |       && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \
25 |       && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \
26 |       && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \
27 |       && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \
28 |       && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \
29 |       && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126))
30 | /* The character set is not based on ISO-646.  */
31 | #error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gperf@gnu.org>."
32 | #endif
33 | 
34 | 
35 | #define TOTAL_KEYWORDS 1000
36 | #define MIN_WORD_LENGTH 70
37 | #define MAX_WORD_LENGTH 70
38 | #define MIN_HASH_VALUE 20
39 | #define MAX_HASH_VALUE 7027
40 | /* maximum key range = 7008, duplicates = 0 */
41 | 
42 | #ifdef __GNUC__
43 | __inline
44 | #else
45 | #ifdef __cplusplus
46 | inline
47 | #endif
48 | #endif
49 | /*ARGSUSED*/
50 | unsigned int
51 | GperfUrlHash (const char *str, size_t len)
52 | {
53 |   static unsigned short asso_values[] =
54 |     {
55 |       7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028,
56 |       7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028,
57 |       7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028,
58 |       7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028,
59 |       7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028,  464, 1954,
60 |        801, 1634,  576,    6,   30,  325,  110,   85, 1199, 1934,
61 |        793, 1864,  825, 7028, 7028, 7028, 7028, 7028, 7028, 7028,
62 |       7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028,
63 |       7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028,
64 |       7028, 7028, 7028, 7028, 7028, 7028, 7028,  471,  488,  513,
65 |       1831,  292,   15, 1015,  506,   20,  435,  506,   55,  575,
66 |        900,    5,   10,  290,    5,    0,   60,  180,   80,  745,
67 |        136,    1,  365, 1799,  107, 2363,  936,  212, 7028, 7028,
68 |       7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028,
69 |       7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028,
70 |       7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028,
71 |       7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028,
72 |       7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028,
73 |       7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028,
74 |       7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028,
75 |       7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028,
76 |       7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028,
77 |       7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028,
78 |       7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028,
79 |       7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028,
80 |       7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028, 7028,
81 |       7028
82 |     };
83 |   return asso_values[(unsigned char)str[61]+5] + asso_values[(unsigned char)str[59]+4] + asso_values[(unsigned char)str[46]] + asso_values[(unsigned char)str[45]+1];
84 | }
85 | 
86 | 


--------------------------------------------------------------------------------
/src/keyuser/src/gperf-hashes/cpf.c:
--------------------------------------------------------------------------------
 1 | /* ANSI-C code produced by gperf version 3.1 */
 2 | /* Command-line: ./bin/gperf  */
 3 | /* Computed positions: -k'1,5,13-14' */
 4 | 
 5 | #include "gperf-hashes.h"
 6 | 
 7 | #if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
 8 |       && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \
 9 |       && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \
10 |       && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \
11 |       && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \
12 |       && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \
13 |       && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \
14 |       && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \
15 |       && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \
16 |       && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \
17 |       && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \
18 |       && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \
19 |       && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \
20 |       && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \
21 |       && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \
22 |       && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \
23 |       && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \
24 |       && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \
25 |       && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \
26 |       && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \
27 |       && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \
28 |       && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \
29 |       && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126))
30 | /* The character set is not based on ISO-646.  */
31 | #error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gperf@gnu.org>."
32 | #endif
33 | 
34 | 
35 | #define TOTAL_KEYWORDS 1000
36 | #define MIN_WORD_LENGTH 17
37 | #define MAX_WORD_LENGTH 17
38 | #define MIN_HASH_VALUE 15
39 | #define MAX_HASH_VALUE 6416
40 | /* maximum key range = 6402, duplicates = 0 */
41 | 
42 | #ifdef __GNUC__
43 | __inline
44 | #else
45 | #ifdef __cplusplus
46 | inline
47 | #endif
48 | #endif
49 | /*ARGSUSED*/
50 | unsigned int
51 | GperfCPFHash (const char *str, size_t len)
52 | {
53 |   static unsigned short asso_values[] =
54 |     {
55 |       6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417,
56 |       6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417,
57 |       6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417,
58 |       6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417,
59 |       6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 1494,  825,
60 |         41,  206,  865,  435,    0,   10,   60,   25,  475, 1694,
61 |         80,  405,  665,   45,   27,  300,   15,    5,    0,   30,
62 |        150,  175,   71,  986,  667, 1429, 1796,  766,  900,  371,
63 |       1934,  987, 1754, 6417, 6417, 6417, 6417, 6417, 6417, 6417,
64 |       6417, 6417, 6417, 6417, 6417, 6417, 6417, 1439,   86,    1,
65 |        475,  205,  290,  950, 1886, 1986,   26, 2123,  923,  332,
66 |       1748,   96,  342, 1593,  857, 6417, 6417, 6417, 6417, 6417,
67 |       6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417,
68 |       6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417,
69 |       6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417,
70 |       6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417,
71 |       6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417,
72 |       6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417,
73 |       6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417,
74 |       6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417,
75 |       6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417,
76 |       6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417,
77 |       6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417,
78 |       6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417,
79 |       6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417,
80 |       6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417,
81 |       6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417
82 |     };
83 |   return asso_values[(unsigned char)str[13]+1] + asso_values[(unsigned char)str[12]] + asso_values[(unsigned char)str[4]+6] + asso_values[(unsigned char)str[0]+12];
84 | }
85 | 
86 | 


--------------------------------------------------------------------------------
/src/keyuser/src/gperf-hashes/mac.c:
--------------------------------------------------------------------------------
 1 | /* ANSI-C code produced by gperf version 3.1 */
 2 | /* Command-line: ./bin/gperf  */
 3 | /* Computed positions: -k'1,5,13-14' */
 4 | 
 5 | #include "gperf-hashes.h"
 6 | 
 7 | #if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
 8 |       && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \
 9 |       && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \
10 |       && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \
11 |       && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \
12 |       && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \
13 |       && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \
14 |       && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \
15 |       && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \
16 |       && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \
17 |       && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \
18 |       && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \
19 |       && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \
20 |       && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \
21 |       && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \
22 |       && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \
23 |       && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \
24 |       && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \
25 |       && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \
26 |       && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \
27 |       && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \
28 |       && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \
29 |       && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126))
30 | /* The character set is not based on ISO-646.  */
31 | #error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gperf@gnu.org>."
32 | #endif
33 | 
34 | 
35 | #define TOTAL_KEYWORDS 1000
36 | #define MIN_WORD_LENGTH 17
37 | #define MAX_WORD_LENGTH 17
38 | #define MIN_HASH_VALUE 15
39 | #define MAX_HASH_VALUE 6416
40 | /* maximum key range = 6402, duplicates = 0 */
41 | 
42 | #ifdef __GNUC__
43 | __inline
44 | #else
45 | #ifdef __cplusplus
46 | inline
47 | #endif
48 | #endif
49 | /*ARGSUSED*/
50 | unsigned int
51 | GperfMACHash (const char *str, size_t len)
52 | {
53 |   static unsigned short asso_values[] =
54 |     {
55 |       6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417,
56 |       6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417,
57 |       6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417,
58 |       6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417,
59 |       6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 1494,  825,
60 |         41,  206,  865,  435,    0,   10,   60,   25,  475, 1694,
61 |         80,  405,  665,   45,   27,  300,   15,    5,    0,   30,
62 |        150,  175,   71,  986,  667, 1429, 1796,  766,  900,  371,
63 |       1934,  987, 1754, 6417, 6417, 6417, 6417, 6417, 6417, 6417,
64 |       6417, 6417, 6417, 6417, 6417, 6417, 6417, 1439,   86,    1,
65 |        475,  205,  290,  950, 1886, 1986,   26, 2123,  923,  332,
66 |       1748,   96,  342, 1593,  857, 6417, 6417, 6417, 6417, 6417,
67 |       6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417,
68 |       6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417,
69 |       6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417,
70 |       6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417,
71 |       6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417,
72 |       6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417,
73 |       6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417,
74 |       6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417,
75 |       6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417,
76 |       6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417,
77 |       6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417,
78 |       6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417,
79 |       6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417,
80 |       6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417,
81 |       6417, 6417, 6417, 6417, 6417, 6417, 6417, 6417
82 |     };
83 |   return asso_values[(unsigned char)str[13]+1] + asso_values[(unsigned char)str[12]] + asso_values[(unsigned char)str[4]+6] + asso_values[(unsigned char)str[0]+12];
84 | }
85 | 
86 | 


--------------------------------------------------------------------------------
/src/keyuser/src/gperf-hashes/ipv6.c:
--------------------------------------------------------------------------------
 1 | /* ANSI-C code produced by gperf version 3.1 */
 2 | /* Command-line: ./bin/gperf  */
 3 | /* Computed positions: -k'1,4,9,28' */
 4 | 
 5 | #include "gperf-hashes.h"
 6 | 
 7 | #if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
 8 |       && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \
 9 |       && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \
10 |       && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \
11 |       && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \
12 |       && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \
13 |       && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \
14 |       && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \
15 |       && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \
16 |       && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \
17 |       && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \
18 |       && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \
19 |       && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \
20 |       && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \
21 |       && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \
22 |       && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \
23 |       && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \
24 |       && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \
25 |       && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \
26 |       && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \
27 |       && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \
28 |       && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \
29 |       && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126))
30 | /* The character set is not based on ISO-646.  */
31 | #error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gperf@gnu.org>."
32 | #endif
33 | 
34 | 
35 | #define TOTAL_KEYWORDS 1000
36 | #define MIN_WORD_LENGTH 39
37 | #define MAX_WORD_LENGTH 39
38 | #define MIN_HASH_VALUE 10
39 | #define MAX_HASH_VALUE 5736
40 | /* maximum key range = 5727, duplicates = 0 */
41 | 
42 | #ifdef __GNUC__
43 | __inline
44 | #else
45 | #ifdef __cplusplus
46 | inline
47 | #endif
48 | #endif
49 | /*ARGSUSED*/
50 | unsigned int
51 | GperfIPV6Hash (const char *str, size_t len)
52 | {
53 |   static unsigned short asso_values[] =
54 |     {
55 |       5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737,
56 |       5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737,
57 |       5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737,
58 |       5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737,
59 |       5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 1439,   12,
60 |        136,  132,  835, 1314,  190,   20,   55,  230, 1000,  430,
61 |        615,   10,    5,  165,   60,    0,  265,   25,    0,   30,
62 |         15,  450, 1023,  995,  751,  960,  230, 1969,   96,  510,
63 |        200,  188, 2048,   86,   46, 5737, 5737, 5737, 5737, 5737,
64 |       5737, 5737, 5737, 5737, 5737, 5737, 5737,  920,  882,  937,
65 |       1826,  244,  501, 1834, 1746,  557, 1579,  115,    1,  721,
66 |        961,    5,  700,   57, 1506,  890,  362, 5737, 5737, 5737,
67 |       5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737,
68 |       5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737,
69 |       5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737,
70 |       5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737,
71 |       5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737,
72 |       5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737,
73 |       5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737,
74 |       5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737,
75 |       5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737,
76 |       5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737,
77 |       5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737,
78 |       5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737,
79 |       5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737,
80 |       5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737,
81 |       5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737, 5737
82 |     };
83 |   return asso_values[(unsigned char)str[27]+10] + asso_values[(unsigned char)str[8]+6] + asso_values[(unsigned char)str[3]] + asso_values[(unsigned char)str[0]+14];
84 | }
85 | 
86 | 


--------------------------------------------------------------------------------
/src/keyuser/src/gperf-hashes/url_complex.c:
--------------------------------------------------------------------------------
 1 | /* ANSI-C code produced by gperf version 3.1 */
 2 | /* Command-line: ./bin/gperf input.txt  */
 3 | /* Computed positions: -k'24,59-61' */
 4 | 
 5 | #include "gperf-hashes.h"
 6 | 
 7 | #if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
 8 |       && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \
 9 |       && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \
10 |       && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \
11 |       && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \
12 |       && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \
13 |       && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \
14 |       && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \
15 |       && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \
16 |       && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \
17 |       && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \
18 |       && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \
19 |       && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \
20 |       && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \
21 |       && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \
22 |       && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \
23 |       && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \
24 |       && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \
25 |       && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \
26 |       && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \
27 |       && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \
28 |       && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \
29 |       && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126))
30 | /* The character set is not based on ISO-646.  */
31 | #error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gperf@gnu.org>."
32 | #endif
33 | 
34 | 
35 | #define TOTAL_KEYWORDS 1000
36 | #define MIN_WORD_LENGTH 83
37 | #define MAX_WORD_LENGTH 83
38 | #define MIN_HASH_VALUE 25
39 | #define MAX_HASH_VALUE 4734
40 | /* maximum key range = 4710, duplicates = 0 */
41 | 
42 | #ifdef __GNUC__
43 | __inline
44 | #else
45 | #ifdef __cplusplus
46 | inline
47 | #endif
48 | #endif
49 | /*ARGSUSED*/
50 | unsigned int
51 | GperfUrlComplexHash (const char *str, size_t len)
52 | {
53 |   static unsigned short asso_values[] =
54 |     {
55 |       4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735,
56 |       4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735,
57 |       4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735,
58 |       4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735,
59 |       4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735,  909,   17,
60 |        955,  652,   17,  146,  181,  386,  726,  267, 4735,   67,
61 |        222, 1016,   42, 1449,  991, 1499,  591, 1699, 2044, 4735,
62 |       4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735,
63 |       4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735,
64 |       4735, 4735, 4735, 4735, 4735, 4735, 4735,  121,  575,  730,
65 |        880,  985,   16,  315,  111,  245,  930,    1,  180,   30,
66 |         60,   45,    0,  460,  215,   40,   15,    5,   25,    0,
67 |        125,  535,    5, 1018, 1476, 1854, 1869,  937, 1394, 1609,
68 |        987,  867, 1789,   51, 4735, 4735, 4735, 4735, 4735, 4735,
69 |       4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735,
70 |       4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735,
71 |       4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735,
72 |       4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735,
73 |       4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735,
74 |       4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735,
75 |       4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735,
76 |       4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735,
77 |       4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735,
78 |       4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735,
79 |       4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735,
80 |       4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735, 4735,
81 |       4735, 4735, 4735, 4735, 4735, 4735, 4735
82 |     };
83 |   return asso_values[(unsigned char)str[60]+11] + asso_values[(unsigned char)str[59]] + asso_values[(unsigned char)str[58]] + asso_values[(unsigned char)str[23]];
84 | }
85 | 
86 | 


--------------------------------------------------------------------------------
/src/keyuser/src/gperf-hashes/ipv4.c:
--------------------------------------------------------------------------------
 1 | /* ANSI-C code produced by gperf version 3.1 */
 2 | /* Command-line: ./bin/gperf  */
 3 | /* Computed positions: -k'8,14,22' */
 4 | 
 5 | #include "gperf-hashes.h"
 6 | 
 7 | #if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
 8 |       && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \
 9 |       && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \
10 |       && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \
11 |       && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \
12 |       && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \
13 |       && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \
14 |       && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \
15 |       && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \
16 |       && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \
17 |       && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \
18 |       && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \
19 |       && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \
20 |       && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \
21 |       && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \
22 |       && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \
23 |       && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \
24 |       && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \
25 |       && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \
26 |       && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \
27 |       && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \
28 |       && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \
29 |       && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126))
30 | /* The character set is not based on ISO-646.  */
31 | #error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gperf@gnu.org>."
32 | #endif
33 | 
34 | 
35 | #define TOTAL_KEYWORDS 1000
36 | #define MIN_WORD_LENGTH 25
37 | #define MAX_WORD_LENGTH 27
38 | #define MIN_HASH_VALUE 27
39 | #define MAX_HASH_VALUE 3126
40 | /* maximum key range = 3100, duplicates = 0 */
41 | 
42 | #ifdef __GNUC__
43 | __inline
44 | #else
45 | #ifdef __cplusplus
46 | inline
47 | #endif
48 | #endif
49 | 
50 | unsigned int
51 | GperfIPV4Hash (const char *str, size_t len)
52 | {
53 |   static unsigned short asso_values[] =
54 |     {
55 |       3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127,
56 |       3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127,
57 |       3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127,
58 |       3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127,
59 |       3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127,
60 |       3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127,
61 |       3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127,
62 |       3127,  464,   75,    0,  942,   55,   20,  350,  100,  820,
63 |        395, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127,
64 |       3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127,
65 |       3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127,
66 |       3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127,
67 |       3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127,  940,  862,
68 |       1000,  126,   11,  376,  700,  281,  162,  860,  181,  980,
69 |         22,  147,  416,  561,   16,  951,  516,  171,  567,   26,
70 |        781,  315,   70,   15,    6,  751,    0,    5,    0,  211,
71 |         80,  370,  115,    0,  240,  795,  465,   95,   40,   20,
72 |        285,  185,  630,  620,  570,   10, 1015,  545,  585,  165,
73 |         30,  196,  305,  510,  560,   80,  345,   25,   70,  850,
74 |         15,  135,  636,  772,  741,   92,  977,  420, 1669,  151,
75 |         42,  803,  662,  986,   58,  588,  792, 1444, 1074,  172,
76 |        102,  360,   72,  672,  846, 3127, 3127, 3127, 3127, 3127,
77 |       3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127,
78 |       3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127,
79 |       3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127,
80 |       3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127,
81 |       3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127,
82 |       3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127, 3127
83 |     };
84 |   return len + asso_values[(unsigned char)str[21]] + asso_values[(unsigned char)str[13]] + asso_values[(unsigned char)str[7]+23];
85 | }
86 | 
87 | 


--------------------------------------------------------------------------------
/src/keyuser/src/gperf-hashes/ssn.c:
--------------------------------------------------------------------------------
 1 | /* ANSI-C code produced by gperf version 3.1 */
 2 | /* Command-line: ./bin/gperf  */
 3 | /* Computed positions: -k'1-3,5-6,10' */
 4 | 
 5 | #include "gperf-hashes.h"
 6 | 
 7 | #if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
 8 |       && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \
 9 |       && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \
10 |       && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \
11 |       && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \
12 |       && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \
13 |       && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \
14 |       && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \
15 |       && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \
16 |       && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \
17 |       && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \
18 |       && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \
19 |       && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \
20 |       && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \
21 |       && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \
22 |       && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \
23 |       && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \
24 |       && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \
25 |       && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \
26 |       && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \
27 |       && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \
28 |       && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \
29 |       && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126))
30 | /* The character set is not based on ISO-646.  */
31 | #error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gperf@gnu.org>."
32 | #endif
33 | 
34 | 
35 | #define TOTAL_KEYWORDS 1000
36 | #define MIN_WORD_LENGTH 11
37 | #define MAX_WORD_LENGTH 11
38 | #define MIN_HASH_VALUE 100
39 | #define MAX_HASH_VALUE 9570
40 | /* maximum key range = 9471, duplicates = 0 */
41 | 
42 | #ifdef __GNUC__
43 | __inline
44 | #else
45 | #ifdef __cplusplus
46 | inline
47 | #endif
48 | #endif
49 | /*ARGSUSED*/
50 | unsigned int
51 | GperfSSNHash (const char *str, size_t len)
52 | {
53 |   static unsigned short asso_values[] =
54 |     {
55 |       9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571,
56 |       9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571,
57 |       9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571,
58 |       9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571,
59 |       9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 3048,  933,
60 |       1999,   17,   80,  680, 1005,  370,   20,   35,   85,  185,
61 |        175,   35, 1889,    5,   15,    0,    1,  461,  847, 2123,
62 |       3418, 3667, 4346, 9571, 9571, 9571, 9571, 9571, 9571, 9571,
63 |       9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571,
64 |       9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571,
65 |       9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571,
66 |       9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571,
67 |       9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571,
68 |       9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571,
69 |       9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571,
70 |       9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571,
71 |       9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571,
72 |       9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571,
73 |       9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571,
74 |       9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571,
75 |       9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571,
76 |       9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571,
77 |       9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571,
78 |       9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571,
79 |       9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571,
80 |       9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571,
81 |       9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571, 9571,
82 |       9571
83 |     };
84 |   return asso_values[(unsigned char)str[9]+1] + asso_values[(unsigned char)str[5]+4] + asso_values[(unsigned char)str[4]] + asso_values[(unsigned char)str[2]+11] + asso_values[(unsigned char)str[1]+8] + asso_values[(unsigned char)str[0]+15];
85 | }
86 | 
87 | 


--------------------------------------------------------------------------------
/src/keyuser/src/gperf-hashes/ints.c:
--------------------------------------------------------------------------------
 1 | /* ANSI-C code produced by gperf version 3.1 */
 2 | /* Command-line: ./bin/gperf  */
 3 | /* Computed positions: -k'1-2,18,23,51' */
 4 | 
 5 | #include "gperf-hashes.h"
 6 | 
 7 | #if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
 8 |       && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \
 9 |       && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \
10 |       && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \
11 |       && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \
12 |       && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \
13 |       && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \
14 |       && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \
15 |       && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \
16 |       && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \
17 |       && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \
18 |       && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \
19 |       && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \
20 |       && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \
21 |       && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \
22 |       && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \
23 |       && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \
24 |       && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \
25 |       && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \
26 |       && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \
27 |       && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \
28 |       && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \
29 |       && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126))
30 | /* The character set is not based on ISO-646.  */
31 | #error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gperf@gnu.org>."
32 | #endif
33 | 
34 | 
35 | #define TOTAL_KEYWORDS 1000
36 | #define MIN_WORD_LENGTH 100
37 | #define MAX_WORD_LENGTH 100
38 | #define MIN_HASH_VALUE 34
39 | #define MAX_HASH_VALUE 9933
40 | /* maximum key range = 9900, duplicates = 0 */
41 | 
42 | #ifdef __GNUC__
43 | __inline
44 | #else
45 | #ifdef __cplusplus
46 | inline
47 | #endif
48 | #endif
49 | /*ARGSUSED*/
50 | unsigned int
51 | GperfINTSHash (const char *str, size_t len)
52 | {
53 |   static unsigned short asso_values[] =
54 |     {
55 |       9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934,
56 |       9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934,
57 |       9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934,
58 |       9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934,
59 |       9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 1329,
60 |        728, 1983,    1,    0,  115,   65,  400,  995,   55,  540,
61 |        240,   25,  886, 2868,   15,  160,  435,    5,  872, 2001,
62 |       3678,    0,   10,   40,  962, 2588, 1429,    2,   36, 3217,
63 |       1010, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934,
64 |       9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934,
65 |       9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934,
66 |       9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934,
67 |       9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934,
68 |       9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934,
69 |       9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934,
70 |       9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934,
71 |       9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934,
72 |       9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934,
73 |       9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934,
74 |       9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934,
75 |       9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934,
76 |       9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934,
77 |       9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934,
78 |       9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934,
79 |       9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934,
80 |       9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934,
81 |       9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934,
82 |       9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934, 9934
83 |     };
84 |   return asso_values[(unsigned char)str[50]+1] + asso_values[(unsigned char)str[22]+10] + asso_values[(unsigned char)str[17]+23] + asso_values[(unsigned char)str[1]+4] + asso_values[(unsigned char)str[0]+16];
85 | }
86 | 
87 | 


--------------------------------------------------------------------------------
/src/keyuser/src/google-hashes/city.hpp:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2011 Google, Inc.
  2 | //
  3 | // Permission is hereby granted, free of charge, to any person obtaining a copy
  4 | // of this software and associated documentation files (the "Software"), to deal
  5 | // in the Software without restriction, including without limitation the rights
  6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  7 | // copies of the Software, and to permit persons to whom the Software is
  8 | // furnished to do so, subject to the following conditions:
  9 | //
 10 | // The above copyright notice and this permission notice shall be included in
 11 | // all copies or substantial portions of the Software.
 12 | //
 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 19 | // THE SOFTWARE.
 20 | //
 21 | // CityHash, by Geoff Pike and Jyrki Alakuijala
 22 | //
 23 | // http://code.google.com/p/cityhash/
 24 | //
 25 | // This file provides a few functions for hashing strings.  All of them are
 26 | // high-quality functions in the sense that they pass standard tests such
 27 | // as Austin Appleby's SMHasher.  They are also fast.
 28 | //
 29 | // For 64-bit x86 code, on short strings, we don't know of anything faster than
 30 | // CityHash64 that is of comparable quality.  We believe our nearest competitor
 31 | // is Murmur3.  For 64-bit x86 code, CityHash64 is an excellent choice for hash
 32 | // tables and most other hashing (excluding cryptography).
 33 | //
 34 | // For 64-bit x86 code, on long strings, the picture is more complicated.
 35 | // On many recent Intel CPUs, such as Nehalem, Westmere, Sandy Bridge, etc.,
 36 | // CityHashCrc128 appears to be faster than all competitors of comparable
 37 | // quality.  CityHash128 is also good but not quite as fast.  We believe our
 38 | // nearest competitor is Bob Jenkins' Spooky.  We don't have great data for
 39 | // other 64-bit CPUs, but for long strings we know that Spooky is slightly
 40 | // faster than CityHash on some relatively recent AMD x86-64 CPUs, for example.
 41 | // Note that CityHashCrc128 is declared in citycrc.h.
 42 | //
 43 | // For 32-bit x86 code, we don't know of anything faster than CityHash32 that
 44 | // is of comparable quality.  We believe our nearest competitor is Murmur3A.
 45 | // (On 64-bit CPUs, it is typically faster to use the other CityHash variants.)
 46 | //
 47 | // Functions in the CityHash family are not suitable for cryptography.
 48 | //
 49 | // Please see CityHash's README file for more details on our performance
 50 | // measurements and so on.
 51 | //
 52 | // WARNING: This code has been only lightly tested on big-endian platforms!
 53 | // It is known to work well on little-endian platforms that have a small penalty
 54 | // for unaligned reads, such as current Intel and AMD moderate-to-high-end CPUs.
 55 | // It should work on all 32-bit and 64-bit platforms that allow unaligned reads;
 56 | // bug reports are welcome.
 57 | //
 58 | // By the way, for some hash functions, given strings a and b, the hash
 59 | // of a+b is easily derived from the hashes of a and b.  This property
 60 | // doesn't hold for any hash functions in this file.
 61 | 
 62 | #ifndef CITY_HASH_H_
 63 | #define CITY_HASH_H_
 64 | 
 65 | #include <stdlib.h>  // for size_t.
 66 | #include <stdint.h>
 67 | #include <utility>
 68 | 
 69 | typedef uint8_t uint8;
 70 | typedef uint32_t uint32;
 71 | typedef uint64_t uint64;
 72 | typedef std::pair<uint64, uint64> uint128;
 73 | 
 74 | inline uint64 Uint128Low64(const uint128& x) { return x.first; }
 75 | inline uint64 Uint128High64(const uint128& x) { return x.second; }
 76 | 
 77 | // Hash function for a byte array.
 78 | uint64 CityHash64(const char *buf, size_t len);
 79 | 
 80 | // Hash function for a byte array.  For convenience, a 64-bit seed is also
 81 | // hashed into the result.
 82 | uint64 CityHash64WithSeed(const char *buf, size_t len, uint64 seed);
 83 | 
 84 | // Hash function for a byte array.  For convenience, two seeds are also
 85 | // hashed into the result.
 86 | uint64 CityHash64WithSeeds(const char *buf, size_t len,
 87 |                            uint64 seed0, uint64 seed1);
 88 | 
 89 | // Hash function for a byte array.
 90 | uint128 CityHash128(const char *s, size_t len);
 91 | 
 92 | // Hash function for a byte array.  For convenience, a 128-bit seed is also
 93 | // hashed into the result.
 94 | uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed);
 95 | 
 96 | // Hash function for a byte array.  Most useful in 32-bit binaries.
 97 | uint32 CityHash32(const char *buf, size_t len);
 98 | 
 99 | // Hash 128 input bits down to 64 bits of output.
100 | // This is intended to be a reasonably good hash function.
101 | inline uint64 Hash128to64(const uint128& x) {
102 |   // Murmur-inspired hashing.
103 |   const uint64 kMul = 0x9ddfea08eb382d69ULL;
104 |   uint64 a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul;
105 |   a ^= (a >> 47);
106 |   uint64 b = (Uint128High64(x) ^ a) * kMul;
107 |   b ^= (b >> 47);
108 |   b *= kMul;
109 |   return b;
110 | }
111 | 
112 | #endif  // CITY_HASH_H_
113 | 


--------------------------------------------------------------------------------
/src/keybuilder/src/main.c:
--------------------------------------------------------------------------------
  1 | /*
  2 | * The regex generator works by first assuming the entire string is always the
  3 | * same, and adapting that assumption as it reads more inputs. For example,
  4 | * imagine the following 3 lines of input:
  5 | *
  6 | *                                 10:45:AF
  7 | *                                 FF:35:AB
  8 | *                                 07:cb:09
  9 | *
 10 | * After reading the first line, our program assumes 10:45:AF is a fixed string.
 11 | * After reading the second line, our program updates that to a regex. Every byte
 12 | * that is different from the original becomes a [] class:
 13 | *
 14 | *                      [0-Z][0-Z]:[0-9][0-9]:[A-Z][A-Z]
 15 | *
 16 | * Upon reading the third line, we will update again to:
 17 | *
 18 | *                      [0-Z][0-Z]:[0-z][0-z]:[0-Z][0-Z]
 19 | */
 20 | 
 21 | #define _GNU_SOURCE
 22 | 
 23 | #include <stdlib.h>
 24 | #include <stdio.h>
 25 | #include <errno.h>
 26 | #include <string.h>
 27 | #include <sys/types.h>
 28 | #include <math.h>
 29 | 
 30 | typedef struct Range {
 31 | 	char start;
 32 | 	char end;
 33 | 	unsigned long count[256];
 34 | } Range;
 35 | 
 36 | enum Class {
 37 | 	Class_Num = 0x1,
 38 | 	Class_Lowercase = 0x2,
 39 | 	Class_UpperCase = 0x4,
 40 | 	Class_Punct = 0x8,
 41 | };
 42 | 
 43 | static int range_class(const Range range) {
 44 | 	int class = 0;
 45 | 
 46 | 	if (range.start >= '0' && range.start <= '9')
 47 | 		class |= Class_Num;
 48 | 	else if (range.start >= 'A' && range.start <= 'Z')
 49 | 		class |= Class_UpperCase;
 50 | 	else if (range.start >= 'a' && range.start <= 'z')
 51 | 		class |= Class_Lowercase;
 52 | 	else
 53 | 		class |= Class_Punct;
 54 | 
 55 | 	if (range.end >= '0' && range.end <= '9')
 56 | 		class |= Class_Num;
 57 | 	else if (range.end >= 'A' && range.end <= 'Z')
 58 | 		class |= Class_UpperCase;
 59 | 	else if (range.end >= 'a' && range.end <= 'z')
 60 | 		class |= Class_Lowercase;
 61 | 	else
 62 | 		class |= Class_Punct;
 63 | 
 64 | 	return class;
 65 | }
 66 | 
 67 | static void print_class(const Range range) {
 68 | 	char start = 0;
 69 | 	char end = 127;
 70 | 
 71 | 	if (range.start >= '0' && range.start <= '9')
 72 | 		start = '0';
 73 | 	else if (range.start >= 'A' && range.start <= 'Z')
 74 | 		start = 'A';
 75 | 	else if (range.start >= 'a' && range.start <= 'z')
 76 | 		start = 'a';
 77 | 	else
 78 | 		start = '!';
 79 | 
 80 | 	if (range.end >= '0' && range.end <= '9')
 81 | 		end = '9';
 82 | 	else if (range.end >= 'A' && range.end <= 'Z')
 83 | 		end = 'Z';
 84 | 	else if (range.end >= 'a' && range.end <= 'z')
 85 | 		end = 'z';
 86 | 	else
 87 | 		end = '}';
 88 | 
 89 | 	printf("[%c-%c]", start, end);
 90 | }
 91 | 
 92 | static int is_special(const char ch) {
 93 | 	return ch == '\\'
 94 | 		|| ch == '['
 95 | 		|| ch == '{'
 96 | 		|| ch == ')'
 97 | 		|| ch == '('
 98 | 		|| ch == '+'
 99 | 		|| ch == '*'
100 | 		|| ch == '?'
101 | 		|| ch == '.';
102 | }
103 | 
104 | int main(int argc, const char* argv[]) {
105 | 
106 | 	if (argc > 1) {
107 | 		printf("keybuilder\n");
108 | 		printf("\nDescription: keybuilder generates a regex from a series of strings separated by newlines\n");
109 | 		printf("\nExample usage: `./keybuilder < keys.txt`\n");
110 | 		printf("\nOptions:\n");
111 | 		printf("\n    -h    Print this help\n");
112 | 		printf("\n");
113 | 		if (!(strcmp(argv[1], "-h") == 0 || strcmp(argv[1], "--help") == 0)) {
114 | 			fprintf(stderr, "ERROR: unrecognized argument: %s\n", argv[1]);
115 | 			return 1;
116 | 		} else {
117 | 			return 0;
118 | 		}
119 | 	}
120 | 
121 | 	char* line = NULL;
122 | 	size_t n;
123 | 
124 | 	// begin by reading the first line
125 | 	ssize_t line_size = getline(&line, &n, stdin);
126 | 	if (line_size == -1) {
127 | 		if (errno != 0) {
128 | 			fprintf(stderr, "failed to read initial line from standard input: %s\n", strerror(errno));
129 | 		} else {
130 | 			fprintf(stderr, "must send strings in standard input!\n");
131 | 		}
132 | 		free(line);
133 | 		return 1;
134 | 	}
135 | 
136 | 	Range* ranges = malloc(line_size * sizeof(*ranges));
137 | 
138 | 	// set the initial ranges, according to the first line
139 | 	for (ssize_t i = 0; i < line_size - 1; ++i) {
140 | 		ranges[i].start = line[i];
141 | 		ranges[i].end = line[i];
142 | 	}
143 | 
144 | 
145 | 	// now, read every line, while updating the ranges. We assume all
146 | 	// lines have the same size, and exit with an error if they don't
147 | 	ssize_t in_bytes;
148 | 	ssize_t line_count = 1;
149 | 	while ((in_bytes = getline(&line, &n, stdin)) > -1) {
150 | 		if (in_bytes != line_size) {
151 | 			fprintf(stderr, "WARNING: lines have different size!\n");
152 | 		}
153 | 		line_size = line_size < in_bytes ? line_size : in_bytes;
154 | 
155 | 		for (ssize_t i = 0; i < line_size - 1; ++i) {
156 | 			ranges[i].start = line[i] < ranges[i].start ? line[i] : ranges[i].start;
157 | 			ranges[i].end = line[i] > ranges[i].end ? line[i] : ranges[i].end;
158 | 			ranges[i].count[line[i]]++;
159 | 		}
160 | 
161 | 		++line_count;
162 | 	}
163 | 	free(line);
164 | 	line = NULL;
165 | 
166 | 	// finally, group consecutive identical ranges together. For example,
167 | 	// `[0-9][0-9][0-9]` will turn into `[0-9]{3}`
168 | 	ssize_t i = 0;
169 | 	while (i < line_size - 1) {
170 | 		const Range range = ranges[i++];
171 | 		if (range.start == range.end) {
172 | 			if (is_special(range.start)) {
173 | 				putchar('\\');
174 | 			}
175 | 			putchar(range.start);
176 | 		} else {
177 | 			int repetitions = 1;
178 | 			Range other = ranges[i++];
179 | 			while (range_class(range) == range_class(other)) {
180 | 				other = ranges[i++];
181 | 				++repetitions;
182 | 			}
183 | 			--i;
184 | 
185 | 			print_class(range);
186 | 			if (repetitions > 1) {
187 | 				printf("{%d}", repetitions);
188 | 			}
189 | 		}
190 | 	}
191 | 	puts("");
192 | 
193 | 	free(ranges);
194 | 	return 0;
195 | }
196 | 


--------------------------------------------------------------------------------
/src/keyuser/src/main.cpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * @file main.cpp
  3 |  * @brief Main file for the benchmarking program.
  4 |  * 
  5 |  * This file includes the main function and other helper functions for the benchmarking program.
  6 |  * It parses command line arguments and runs the benchmarks accordingly.
  7 |  */
  8 | 
  9 | #include <iostream>
 10 | #include <cstring>
 11 | #include <vector>
 12 | #include <functional>
 13 | #include <unordered_map>
 14 | 
 15 | #include "benchmarks.hpp"
 16 | #include "customHashes.hpp"
 17 | #include "registry.hpp"
 18 | 
 19 | std::string correctBenchUsage(){
 20 |     return "Correct Usage: ./benchmarks -i <number> -s <number> -e <number> -n <number>\n"
 21 |            "       --hashes <hash0> <hash1> ... <hashN>: list of hashes to run\n "
 22 |            "       -i or --insert: integer that represents the percentage of insertion operations\n"
 23 |            "       -s or --search: integer that represents the percentage of search operations\n"
 24 |            "       -e or --elimination: integer that represents the percentage of elimination operations\n"
 25 |            "               The sum of -i, -s, and -e should be 100.\n"
 26 |            "       -n or --num-operations: integer that represents the number of 'times to perform a hash operation on the benchmark\n"
 27 |            "       -r or --repetitions: number of times to repeat the benchmark\n"
 28 |            "       -seed: integer that represents the seed for the random number generator\n "
 29 |            "       --test-distribution: test the distribution of '--hashes' specified hash functions\n"
 30 |            "       --distribution: specify the randon distribution of the keys to be used in the benchmark\n"
 31 |            "       --hash-performance: test the execution time of the hash functions over a set of keys\n"  
 32 |            "       -v or --verbose: print the results of each operation\n"
 33 |            "       -h or --help: print this message\n"
 34 |            ;
 35 | }
 36 | 
 37 | BenchmarkParameters parseArgs(int argc, char** argv){
 38 |     BenchmarkParameters args;
 39 |     args.verbose = false;
 40 |     int i = 1;
 41 |     while(i<argc){
 42 |         if(strcmp(argv[i], "--hashes") == 0){
 43 |             i++;
 44 |             while(i<argc && argv[i][0] != '-'){
 45 |                 args.hashesToRun.push_back(argv[i]);
 46 |                 i++;
 47 |             }
 48 |         } else if(strcmp(argv[i], "-i") == 0 || 
 49 |            strcmp(argv[i], "--insert") == 0){
 50 |             args.insert = atoi(argv[i+1]);
 51 |             i+=2;
 52 |         }else if(strcmp(argv[i], "-s") == 0 || 
 53 |                  strcmp(argv[i], "--search") == 0){
 54 |             args.search = atoi(argv[i+1]);
 55 |             i+=2;
 56 |         }else if(strcmp(argv[i], "-e") == 0 || 
 57 |                  strcmp(argv[i], "--elimination") == 0){
 58 |             args.elimination = atoi(argv[i+1]);
 59 |             i+=2;
 60 |         }else if(strcmp(argv[i], "-n") == 0 || 
 61 |                  strcmp(argv[i], "--num-operations") == 0){
 62 |             args.numOperations = atoi(argv[i+1]);
 63 |             i+=2;
 64 |         }else if(strcmp(argv[i], "-r") == 0 || 
 65 |                  strcmp(argv[i], "--repetitions") == 0){
 66 |             args.repetitions = atoi(argv[i+1]);
 67 |             i+=2;
 68 |         }else if(strcmp(argv[i], "-seed") == 0){
 69 |             args.seed = atoi(argv[i+1]);
 70 |             i+=2;
 71 |         }else if(strcmp(argv[i], "-v") == 0 || 
 72 |                  strcmp(argv[i], "--verbose") == 0){
 73 |             args.verbose = true;
 74 |             i++;
 75 |         }else if(strcmp(argv[i], "--test-distribution") == 0){
 76 |             args.testDistribution = true;
 77 |             i++;
 78 |         }else if(strcmp(argv[i], "--hash-performance") == 0){
 79 |             args.hashPerformance = true;
 80 |             i++;
 81 |         }else if(strcmp(argv[i], "--distribution") == 0){
 82 |             i++;
 83 |             std::string distribution = argv[i];
 84 |             if(distribution == "normal"){
 85 |                 args.distribution = "normal";
 86 |             }else if(distribution == "uniform"){
 87 |                 args.distribution = "uniform";
 88 |             }else{
 89 |                 fprintf(stderr,"Invalid distribution: %s! Defaulting to normal.\n", distribution.c_str());
 90 |                 args.distribution = "normal";
 91 |             }
 92 |             i++;
 93 |         }else if(strcmp(argv[i], "-h") == 0 || 
 94 |                  strcmp(argv[i], "--help") == 0){
 95 |             fprintf(stderr, "%s", correctBenchUsage().c_str());
 96 |             exit(0);
 97 |         }else{
 98 |             fprintf(stderr, "Invalid argument: %s\n", argv[i]);
 99 |             fprintf(stderr, "%s", correctBenchUsage().c_str());
100 |             exit(1);
101 |         }
102 |     }
103 |     int sumArgs = args.insert + args.search + args.elimination;
104 |     if ( sumArgs != 100 ){
105 |         fprintf(stderr, "Missing insert, search, or elimination arguments.\n");
106 |         fprintf(stderr, "%s", correctBenchUsage().c_str());
107 |         exit(1);
108 |     }
109 |     return args;
110 | }
111 | 
112 | int main(int argc, char** argv){
113 | 
114 |     BenchmarkParameters args = parseArgs(argc, argv);
115 | 
116 |     // All benchmarks to run
117 |     std::vector<Benchmark*> benchmarks;
118 | 
119 |     // Register Benchmarks with a macro for beter legibility
120 |     REGISTER_ALL_BENCHMARKS
121 | 
122 |     // Load keys from standard input into memory
123 |     std::vector<std::string> keys;
124 |     std::string line;
125 |     while(std::getline(std::cin, line)){
126 |         keys.push_back(line);
127 |     }
128 | 
129 |     // Fill default hash functions to run
130 |     if(args.hashesToRun.empty()){
131 |         args.hashesToRun.push_back("STDHashBin");
132 |         args.hashesToRun.push_back("FNVHash");
133 |     }
134 |     // Delete benchmarks that are not in the list of hashes to run
135 |     std::vector<Benchmark*> filteredBenchmarks;
136 |     for(auto& bench : benchmarks){
137 |         // Check if we should run this hash based on arguments
138 |         if(std::find(args.hashesToRun.begin(),
139 |                      args.hashesToRun.end(), 
140 |                      bench->getHashName()) != args.hashesToRun.end())
141 |         {
142 |             filteredBenchmarks.push_back(bench);
143 |         } else {
144 |             delete bench;
145 |         }
146 |     }
147 | 
148 |     // Run benchmarks
149 |     if(args.hashPerformance){
150 |         testHashPerformance(filteredBenchmarks, keys, args);
151 |         freeBenchmarks(filteredBenchmarks);
152 |         return 0;
153 |     }
154 |     if(args.testDistribution){
155 |         testDistribution(filteredBenchmarks, keys);
156 |         freeBenchmarks(filteredBenchmarks);
157 |         return 0;
158 |     }
159 |     benchmarkExecutor(filteredBenchmarks, keys, args);
160 |     freeBenchmarks(filteredBenchmarks);
161 | 
162 | }
163 | 
164 | 


--------------------------------------------------------------------------------
/src/keyuser/src/customHashes.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * @file customHashes.hpp
  3 |  * @brief This file contains a collection of custom hash functions.
  4 |  */
  5 | 
  6 | #ifndef CUSTOM_HASHES_HPP
  7 | #define CUSTOM_HASHES_HPP
  8 | 
  9 | #include <cstdint>
 10 | #include <iostream>
 11 | #include <cmath>
 12 | #include <vector>
 13 | #include <numeric>
 14 | 
 15 | /**
 16 |  * @brief Struct for CityHash.
 17 |  */
 18 | struct CityHash{
 19 |     /**
 20 |      * @brief Function call operator that calculates the hash of a key.
 21 |      * 
 22 |      * @param key The fixed lenght std::string key to hash.
 23 |      * @return The hash of the key.
 24 |      */
 25 |     std::size_t operator()(const std::string& key) const;
 26 | };
 27 | 
 28 | /**
 29 |  * @brief Struct for AbseilHash.
 30 |  */
 31 | struct AbseilHash{
 32 |     /**
 33 |      * @brief Function call operator that calculates the hash of a key.
 34 |      * 
 35 |      * @param key The fixed lenght std::string key to hash.
 36 |      * @return The hash of the key.
 37 |      */
 38 |     std::size_t operator()(const std::string& key) const;
 39 | };
 40 | 
 41 | /**
 42 |  * @brief Struct for STDHashSrc.
 43 |  */
 44 | struct STDHashSrc{
 45 |     /**
 46 |      * @brief Function call operator that calculates the hash of a key.
 47 |      * 
 48 |      * @param key The fixed lenght std::string key to hash.
 49 |      * @return The hash of the key.
 50 |      */
 51 |     std::size_t operator()(const std::string& key) const;
 52 | };
 53 | 
 54 | /**
 55 |  * @brief Struct for STDHashBin.
 56 |  */
 57 | struct STDHashBin{
 58 |     /**
 59 |      * @brief Function call operator that calculates the hash of a key.
 60 |      * 
 61 |      * @param key The fixed lenght std::string key to hash.
 62 |      * @return The hash of the key.
 63 |      */
 64 |     std::size_t operator()(const std::string& key) const;
 65 | };
 66 | 
 67 | /**
 68 |  * @brief Struct for FNVHash.
 69 |  */
 70 | struct FNVHash {
 71 |     /**
 72 |      * @brief Function call operator that calculates the hash of a key.
 73 |      * 
 74 |      * @param key The fixed lenght std::string key to hash.
 75 |      * @return The hash of the key.
 76 |      */
 77 |     std::size_t operator()(const std::string& key) const;
 78 | };
 79 | 
 80 | /**
 81 |  * @brief Struct for IPV4HashUnrolled.
 82 |  */
 83 | struct IPV4HashUnrolled{
 84 |     /**
 85 |      * @brief Function call operator that calculates the hash of a key.
 86 |      * 
 87 |      * @param key The fixed lenght std::string key to hash.
 88 |      * @return The hash of the key.
 89 |      */
 90 |     std::size_t operator()(const std::string& key) const;
 91 | };
 92 | 
 93 | /**
 94 |  * @brief Struct for IPV4HashMove.
 95 |  */
 96 | struct IPV4HashMove{
 97 |     /**
 98 |      * @brief Function call operator that calculates the hash of a key.
 99 |      * 
100 |      * @param key The fixed lenght std::string key to hash.
101 |      * @return The hash of the key.
102 |      */
103 |     std::size_t operator()(const std::string& key) const;
104 | };
105 | 
106 | /**
107 |  * @brief Struct for IntSimdHash.
108 |  */
109 | struct IntSimdHash{
110 |     /**
111 |      * @brief Function call operator that calculates the hash of a key.
112 |      * 
113 |      * @param key The fixed lenght std::string key to hash.
114 |      * @return The hash of the key.
115 |      */
116 |     std::size_t operator()(const std::string& key) const;
117 | };
118 | 
119 | /**
120 |  * @brief Struct for PextMurmurINTS.
121 |  */
122 | struct PextMurmurINTS{
123 |     /**
124 |      * @brief Function call operator that calculates the hash of a key.
125 |      * 
126 |      * @param key The fixed lenght std::string key to hash.
127 |      * @return The hash of the key.
128 |      */
129 |     std::size_t operator()(const std::string& key) const;
130 | };
131 | 
132 | /**
133 |  * @brief Struct for PextMurmurUrlComplex.
134 |  */
135 | struct PextMurmurUrlComplex{
136 |     /**
137 |      * @brief Function call operator that calculates the hash of a key.
138 |      * 
139 |      * @param key The fixed lenght std::string key to hash.
140 |      * @return The hash of the key.
141 |      */
142 |     std::size_t operator()(const std::string& key) const;
143 | };
144 | 
145 | /**
146 |  * @brief Macro that declares several structs with the same pattern.
147 |  * 
148 |  * Each struct has a function call operator that calculates the hash of a key.
149 |  * The structs are named by concatenating different prefixes with the NAME argument.
150 |  * 
151 |  * @param NAME The name to append to the struct names.
152 |  */
153 | #define DECLARE_SYNTH_OFFXOR_NAIVE_CHATGPT_AND_GPERF_FUNCTIONS(NAME) \
154 |     struct Aes ## NAME { \
155 |         std::size_t operator()(const std::string& key) const; \
156 |     }; \
157 |     struct Pext ## NAME { \
158 |         std::size_t operator()(const std::string& key) const; \
159 |     }; \
160 |     struct OffXor ## NAME { \
161 |         std::size_t operator()(const std::string& key) const; \
162 |     }; \
163 |     struct Naive ## NAME { \
164 |         std::size_t operator()(const std::string& key) const; \
165 |     }; \
166 |     struct Gpt ## NAME { \
167 |         std::size_t operator()(const std::string& key) const; \
168 |     }; \
169 |     struct Gperf ## NAME { \
170 |         std::size_t operator()(const std::string& key) const; \
171 |     };
172 | 
173 | // Use the macro to declare several structs.
174 | DECLARE_SYNTH_OFFXOR_NAIVE_CHATGPT_AND_GPERF_FUNCTIONS(UrlComplex)
175 | DECLARE_SYNTH_OFFXOR_NAIVE_CHATGPT_AND_GPERF_FUNCTIONS(Url)
176 | DECLARE_SYNTH_OFFXOR_NAIVE_CHATGPT_AND_GPERF_FUNCTIONS(Mac)
177 | DECLARE_SYNTH_OFFXOR_NAIVE_CHATGPT_AND_GPERF_FUNCTIONS(CPF)
178 | DECLARE_SYNTH_OFFXOR_NAIVE_CHATGPT_AND_GPERF_FUNCTIONS(SSN)
179 | DECLARE_SYNTH_OFFXOR_NAIVE_CHATGPT_AND_GPERF_FUNCTIONS(IPV4)
180 | DECLARE_SYNTH_OFFXOR_NAIVE_CHATGPT_AND_GPERF_FUNCTIONS(IPV6)
181 | DECLARE_SYNTH_OFFXOR_NAIVE_CHATGPT_AND_GPERF_FUNCTIONS(INTS)
182 | 
183 | /**
184 |  * @brief Struct for NaiveSimdUrlComplex.
185 |  */
186 | struct NaiveSimdUrlComplex{
187 |     /**
188 |      * @brief Function call operator that calculates the hash of a key.
189 |      * 
190 |      * @param key The fixed lenght std::string key to hash.
191 |      * @return The hash of the key.
192 |      */
193 |     std::size_t operator()(const std::string& key) const;
194 | };
195 | 
196 | /**
197 |  * @brief Struct for NaiveSimdUrl.
198 |  */
199 | struct NaiveSimdUrl {
200 |     /**
201 |      * @brief Function call operator that calculates the hash of a key.
202 |      * 
203 |      * @param key The fixed lenght std::string key to hash.
204 |      * @return The hash of the key.
205 |      */
206 |     std::size_t operator()(const std::string& key) const;
207 | };
208 | 
209 | /**
210 |  * @brief Struct for NaiveSimdIPV6.
211 |  */
212 | struct NaiveSimdIPV6 {
213 |     /**
214 |      * @brief Function call operator that calculates the hash of a key.
215 |      * 
216 |      * @param key The fixed lenght std::string key to hash.
217 |      * @return The hash of the key.
218 |      */
219 |     std::size_t operator()(const std::string& key) const;
220 | }; 
221 | 
222 | /**
223 |  * @brief Struct for NaiveSimdINTS.
224 |  */
225 | struct NaiveSimdINTS {
226 |     /**
227 |      * @brief Function call operator that calculates the hash of a key.
228 |      * 
229 |      * @param key The fixed lenght std::string key to hash.
230 |      * @return The hash of the key.
231 |      */
232 |     std::size_t operator()(const std::string& key) const;
233 | }; 
234 | 
235 | #endif
236 | 


--------------------------------------------------------------------------------
/scripts/keyuser_interpreter.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | # This script reads multiple .csv files produced by keyuser, extracts relevant
  4 | # information and calculate metrics to compare the different hash functions
  5 | 
  6 | import os
  7 | import argparse
  8 | import importlib
  9 | import re
 10 | 
 11 | # Data science boys
 12 | import numpy as np
 13 | import pandas as pd
 14 | import matplotlib.pyplot as plt
 15 | from scipy import stats
 16 | 
 17 | def distribution_test(key, value):
 18 | 
 19 |     n = len(value)
 20 |     chi_worst_case = ((n-1)**2) + (n-1)
 21 | 
 22 |     # Calculate the observed frequencies of each value
 23 |     observed_freq, _ = np.histogram(value, bins=len(np.unique(value)))
 24 | 
 25 |     # Calculate the expected frequencies for a uniform distribution
 26 |     expected_freq = np.full_like(observed_freq, len(value) / len(np.unique(value)))
 27 | 
 28 |     # Perform the Chi-Square Goodness of Fit Test
 29 |     try:
 30 |         chi2, p = stats.chisquare(observed_freq, expected_freq)
 31 |     except Exception:
 32 |         p = 0.0
 33 |         chi2 = chi_worst_case
 34 | 
 35 |     skewness = stats.skew(value)
 36 | 
 37 |     return pd.DataFrame(pd.DataFrame({"Hash Function": [key], "Skewness": [skewness], "Chi-Test": [chi2], "Uniform?": [p > 0.05]}))
 38 |     
 39 | def handle_distribution_analysis(args):
 40 |     try:
 41 |         abs_path = os.path.abspath(args.distribution)
 42 |         spec = importlib.util.spec_from_file_location(args.distribution.replace(".py",""),abs_path)
 43 |         arrays = importlib.util.module_from_spec(spec)
 44 |         spec.loader.exec_module(arrays)
 45 |     except ImportError:
 46 |         print(args.distribution, "is an invalid import! Please make sure that the module exists and is in the same directory as this script.")
 47 |         print("If it does not exist, you can obtain it by running keyuser with --test-distribution or bench-runner with --histogram ", args.distribution)
 48 |         exit(1)
 49 | 
 50 |     regex_name = os.path.basename(args.distribution).replace(".py", "")
 51 | 
 52 |     if args.hash_functions is None:
 53 |         args.hash_functions = []
 54 |         for key, value in arrays.distributions.items():
 55 |             args.hash_functions.append(key)
 56 | 
 57 |     result = None
 58 |     if args.plot_graph:
 59 |         file_destination = args.output_destination + regex_name + "_histogram.svg"
 60 |         _, ax = plt.subplots(figsize=(10, 5))
 61 |     
 62 |     for key, value in arrays.distributions.items():
 63 |         key = key.replace("array_", "")
 64 | 
 65 |         if args.plot_graph:
 66 |             ax.hist(value, label=key, alpha=0.5)
 67 | 
 68 |         if result is None:
 69 |             result = distribution_test(key, value)
 70 |         else:
 71 |             result = pd.concat([result, distribution_test(key, value)], ignore_index=True)
 72 | 
 73 |     if args.plot_graph:
 74 |         ax.legend(loc='upper left', bbox_to_anchor=(1, 1))
 75 |         plt.tight_layout()
 76 |         plt.savefig(file_destination)
 77 |         print("Histograms saved in: ", file_destination)
 78 | 
 79 |     print("Below DataFrame from distribution file: ", args.distribution)
 80 |     print(result)
 81 |     output_path = args.output_destination + regex_name + "_chitest.csv"
 82 |     print("See all results in: ", output_path)
 83 |     result.to_csv(output_path, index=False)
 84 | 
 85 | ########################
 86 | # Performance Analysis #
 87 | ########################
 88 | 
 89 | def mannwhitneyu_from_dataframe(args, regex_name, df, column_to_group, column_to_compare):
 90 |     grouped = df.groupby(column_to_group)
 91 | 
 92 |     # Extract the groups from the DataFrame
 93 |     groups = {}
 94 |     groups = [group for group in grouped.groups]
 95 | 
 96 |     # Initialize an empty DataFrame
 97 |     result = pd.DataFrame()
 98 | 
 99 |     for hashFunc1 in groups:
100 |         for hashFunc2 in groups:
101 |             if hashFunc1 == hashFunc2:
102 |                 continue
103 | 
104 |             # Calculate the Mann-Whitney U test between the first two groups
105 |             _, p = stats.mannwhitneyu(grouped.get_group(hashFunc1)[column_to_compare], grouped.get_group(hashFunc2)[column_to_compare])
106 | 
107 |             # interpret
108 |             alpha = 0.05
109 | 
110 |             # Append a new row to the DataFrame
111 |             result = pd.concat([result, pd.DataFrame({"Hash Function 1": [hashFunc1], "Hash Function 2": [hashFunc2], "p-value": [p], "Same Distribution?": [p > alpha]})], ignore_index=True)
112 | 
113 |     print("Below DataFrame from Regex: ", regex_name)
114 |     print(result)
115 |     output_path = args.output_destination + regex_name + "_" + column_to_compare + "_mannwhitneyu.csv"
116 |     print("See all results in: ", output_path)
117 |     result.to_csv(output_path, index=False)
118 | 
119 | def box_plot_dataframe(args, df, regex_name):
120 |     grouped = df.groupby(['Execution Mode', 'Num Operations', 'Num Keys', 'Insertions (%)', 'Searches (%)', 'Eliminatons(%)'])
121 | 
122 |     # Extract the groups from the DataFrame
123 |     groups = {}
124 |     groups = [group for group in grouped.groups]
125 | 
126 |     for group in groups:
127 |         temp = grouped.get_group(group)[['Execution Time (s)', 'Hash Function']]
128 | 
129 |         ax = grouped.get_group(group)[['Execution Time (s)', 'Hash Function']].boxplot(by='Hash Function', rot=45, showmeans=True)
130 |         ax.set_ylabel('Execution Time (s)')
131 |         ax.set_xlabel('')
132 |         ax.set_title(group)
133 |         plt.suptitle('')
134 |         plt.tight_layout()
135 |         plt.savefig(args.output_destination+regex_name+str(group)+'.svg')
136 | 
137 |     print("Boxplots saved in: ", args.output_destination)
138 | 
139 | def performance_from_dataframe(args, df, regex_name):
140 |     grouped = df.groupby(['Execution Mode', 'Num Operations', 'Num Keys', 'Insertions (%)', 'Searches (%)', 'Eliminatons(%)', 'Hash Function'])
141 | 
142 |     # Extract the groups from the DataFrame
143 |     groups = {}
144 |     groups = [group for group in grouped.groups]
145 | 
146 |     all_data = {}
147 | 
148 |     for group in groups:
149 |         temp = grouped.get_group(group)[['Execution Time (s)', 'Collision Count']]
150 |         hash_func_name = group[-1]
151 | 
152 |         if hash_func_name not in all_data:
153 |             all_data[hash_func_name] = [(temp['Execution Time (s)'].mean(), temp['Collision Count'].mean())]
154 |         else:
155 |             all_data[hash_func_name].append((temp['Execution Time (s)'].mean(), temp['Collision Count'].mean()))
156 | 
157 |     # Geometric mean of all_data
158 |     result = pd.DataFrame()
159 | 
160 |     for data in all_data:
161 |         samples_geotime = 1.0
162 |         samples_collision = 1.0
163 |         for sample in all_data[data]:
164 |             samples_geotime *= sample[0]
165 |             if sample[1] != 0:
166 |                 samples_collision *= sample[1]
167 |         samples_geotime = samples_geotime ** (1/len(all_data[data]))
168 |         samples_collision = samples_collision ** (1/len(all_data[data]))
169 |         result = pd.concat([result, pd.DataFrame({"Func Name": [data], "GeoTime": [samples_geotime], "GeoCollision": [samples_collision]})], ignore_index=True)
170 | 
171 |     print("Below DataFrame from Regex: ", regex_name)
172 |     print(result)
173 |     output_path = args.output_destination + regex_name + "_geomean.csv"
174 |     print("See all results in: ", output_path)
175 |     result.to_csv(output_path, index=False)
176 | 
177 |     # Normalize "Chi-Test" column using the "STDHashSrc" as the reference
178 |     # result["GeoTime"] = 100.0 - (result["GeoTime"] / result[result["Func Name"] == "STDHashSrc"]["GeoTime"].values[0]) * 100.0
179 |     # print("Normalized GeoTime %")
180 |     # print(result)
181 | 
182 | def handle_performance_analysis(args):
183 | 
184 |     # Load CSV files into pandas dataframe
185 |     csv_files = args.performance
186 |     dataframes = [pd.read_csv(file) for file in csv_files]
187 | 
188 |     regex_name = os.path.basename(csv_files[0]).replace(".csv", "")
189 | 
190 |     # Concatenate dataframes
191 |     df = pd.concat(dataframes, ignore_index=True)
192 | 
193 |     # Calculate the Mann-Whitney U test
194 |     mannwhitneyu_from_dataframe(args, regex_name, df, 'Hash Function', 'Execution Time (s)')
195 |     mannwhitneyu_from_dataframe(args, regex_name, df, 'Hash Function', 'Collision Count')
196 | 
197 |     if args.plot_graph:
198 |         box_plot_dataframe(args, df, regex_name)
199 | 
200 |     performance_from_dataframe(args, df, regex_name)
201 | 
202 | def main():
203 |     parser = argparse.ArgumentParser(description="Keyuser Interpreter")
204 |     parser.add_argument("-d", "--distribution", type=str, default="", help="Name of the distribution file to interpret. Exclusive with -p option.")
205 |     parser.add_argument("-p", "--performance", nargs='*', type=str, default="", help="Name of the csv performance files to interpret. Exclusive with -d option.")
206 |     parser.add_argument("-pg", "--plot-graph", action='store_true', help="Option to plot the results in graphs.")
207 |     parser.add_argument("-od", "--output-destination", type=str, default="results/", help="Output path to output graphs. Default is current file.")
208 |     parser.add_argument("-fp", "--full-print", action='store_true', help="Print the entire dataframe.")
209 |     parser.add_argument("-hf", "--hash-functions", nargs='*', type=str, help="Name of the hash functions to analyize.")
210 | 
211 |     args = parser.parse_args()
212 | 
213 |     if args.full_print:
214 |         pd.set_option('display.max_rows', None)
215 |         pd.set_option('display.max_columns', None)
216 |         pd.set_option('display.width', None)
217 |         pd.set_option('display.max_colwidth', None)
218 | 
219 |     if args.performance:
220 |         handle_performance_analysis(args)
221 |     elif args.distribution:
222 |         handle_distribution_analysis(args)
223 | 
224 | main()
225 | 


--------------------------------------------------------------------------------
/src/sepe-runner/Cargo.lock:
--------------------------------------------------------------------------------
  1 | # This file is automatically @generated by Cargo.
  2 | # It is not intended for manual editing.
  3 | version = 3
  4 | 
  5 | [[package]]
  6 | name = "anstream"
  7 | version = "0.6.7"
  8 | source = "registry+https://github.com/rust-lang/crates.io-index"
  9 | checksum = "4cd2405b3ac1faab2990b74d728624cd9fd115651fcecc7c2d8daf01376275ba"
 10 | dependencies = [
 11 |  "anstyle",
 12 |  "anstyle-parse",
 13 |  "anstyle-query",
 14 |  "anstyle-wincon",
 15 |  "colorchoice",
 16 |  "utf8parse",
 17 | ]
 18 | 
 19 | [[package]]
 20 | name = "anstyle"
 21 | version = "1.0.4"
 22 | source = "registry+https://github.com/rust-lang/crates.io-index"
 23 | checksum = "7079075b41f533b8c61d2a4d073c4676e1f8b249ff94a393b0595db304e0dd87"
 24 | 
 25 | [[package]]
 26 | name = "anstyle-parse"
 27 | version = "0.2.3"
 28 | source = "registry+https://github.com/rust-lang/crates.io-index"
 29 | checksum = "c75ac65da39e5fe5ab759307499ddad880d724eed2f6ce5b5e8a26f4f387928c"
 30 | dependencies = [
 31 |  "utf8parse",
 32 | ]
 33 | 
 34 | [[package]]
 35 | name = "anstyle-query"
 36 | version = "1.0.2"
 37 | source = "registry+https://github.com/rust-lang/crates.io-index"
 38 | checksum = "e28923312444cdd728e4738b3f9c9cac739500909bb3d3c94b43551b16517648"
 39 | dependencies = [
 40 |  "windows-sys",
 41 | ]
 42 | 
 43 | [[package]]
 44 | name = "anstyle-wincon"
 45 | version = "3.0.2"
 46 | source = "registry+https://github.com/rust-lang/crates.io-index"
 47 | checksum = "1cd54b81ec8d6180e24654d0b371ad22fc3dd083b6ff8ba325b72e00c87660a7"
 48 | dependencies = [
 49 |  "anstyle",
 50 |  "windows-sys",
 51 | ]
 52 | 
 53 | [[package]]
 54 | name = "clap"
 55 | version = "4.4.17"
 56 | source = "registry+https://github.com/rust-lang/crates.io-index"
 57 | checksum = "80932e03c33999b9235edb8655bc9df3204adc9887c2f95b50cb1deb9fd54253"
 58 | dependencies = [
 59 |  "clap_builder",
 60 |  "clap_derive",
 61 | ]
 62 | 
 63 | [[package]]
 64 | name = "clap_builder"
 65 | version = "4.4.17"
 66 | source = "registry+https://github.com/rust-lang/crates.io-index"
 67 | checksum = "d6c0db58c659eef1c73e444d298c27322a1b52f6927d2ad470c0c0f96fa7b8fa"
 68 | dependencies = [
 69 |  "anstream",
 70 |  "anstyle",
 71 |  "clap_lex",
 72 |  "strsim",
 73 | ]
 74 | 
 75 | [[package]]
 76 | name = "clap_derive"
 77 | version = "4.4.7"
 78 | source = "registry+https://github.com/rust-lang/crates.io-index"
 79 | checksum = "cf9804afaaf59a91e75b022a30fb7229a7901f60c755489cc61c9b423b836442"
 80 | dependencies = [
 81 |  "heck",
 82 |  "proc-macro2",
 83 |  "quote",
 84 |  "syn",
 85 | ]
 86 | 
 87 | [[package]]
 88 | name = "clap_lex"
 89 | version = "0.6.0"
 90 | source = "registry+https://github.com/rust-lang/crates.io-index"
 91 | checksum = "702fc72eb24e5a1e48ce58027a675bc24edd52096d5397d4aea7c6dd9eca0bd1"
 92 | 
 93 | [[package]]
 94 | name = "colorchoice"
 95 | version = "1.0.0"
 96 | source = "registry+https://github.com/rust-lang/crates.io-index"
 97 | checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7"
 98 | 
 99 | [[package]]
100 | name = "equivalent"
101 | version = "1.0.1"
102 | source = "registry+https://github.com/rust-lang/crates.io-index"
103 | checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5"
104 | 
105 | [[package]]
106 | name = "hashbrown"
107 | version = "0.14.3"
108 | source = "registry+https://github.com/rust-lang/crates.io-index"
109 | checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604"
110 | 
111 | [[package]]
112 | name = "heck"
113 | version = "0.4.1"
114 | source = "registry+https://github.com/rust-lang/crates.io-index"
115 | checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
116 | 
117 | [[package]]
118 | name = "indexmap"
119 | version = "2.1.0"
120 | source = "registry+https://github.com/rust-lang/crates.io-index"
121 | checksum = "d530e1a18b1cb4c484e6e34556a0d948706958449fca0cab753d649f2bce3d1f"
122 | dependencies = [
123 |  "equivalent",
124 |  "hashbrown",
125 | ]
126 | 
127 | [[package]]
128 | name = "memchr"
129 | version = "2.7.1"
130 | source = "registry+https://github.com/rust-lang/crates.io-index"
131 | checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149"
132 | 
133 | [[package]]
134 | name = "proc-macro2"
135 | version = "1.0.76"
136 | source = "registry+https://github.com/rust-lang/crates.io-index"
137 | checksum = "95fc56cda0b5c3325f5fbbd7ff9fda9e02bb00bb3dac51252d2f1bfa1cb8cc8c"
138 | dependencies = [
139 |  "unicode-ident",
140 | ]
141 | 
142 | [[package]]
143 | name = "quote"
144 | version = "1.0.35"
145 | source = "registry+https://github.com/rust-lang/crates.io-index"
146 | checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef"
147 | dependencies = [
148 |  "proc-macro2",
149 | ]
150 | 
151 | [[package]]
152 | name = "sepe-runner"
153 | version = "0.1.0"
154 | dependencies = [
155 |  "clap",
156 |  "toml",
157 | ]
158 | 
159 | [[package]]
160 | name = "serde"
161 | version = "1.0.195"
162 | source = "registry+https://github.com/rust-lang/crates.io-index"
163 | checksum = "63261df402c67811e9ac6def069e4786148c4563f4b50fd4bf30aa370d626b02"
164 | dependencies = [
165 |  "serde_derive",
166 | ]
167 | 
168 | [[package]]
169 | name = "serde_derive"
170 | version = "1.0.195"
171 | source = "registry+https://github.com/rust-lang/crates.io-index"
172 | checksum = "46fe8f8603d81ba86327b23a2e9cdf49e1255fb94a4c5f297f6ee0547178ea2c"
173 | dependencies = [
174 |  "proc-macro2",
175 |  "quote",
176 |  "syn",
177 | ]
178 | 
179 | [[package]]
180 | name = "serde_spanned"
181 | version = "0.6.5"
182 | source = "registry+https://github.com/rust-lang/crates.io-index"
183 | checksum = "eb3622f419d1296904700073ea6cc23ad690adbd66f13ea683df73298736f0c1"
184 | dependencies = [
185 |  "serde",
186 | ]
187 | 
188 | [[package]]
189 | name = "strsim"
190 | version = "0.10.0"
191 | source = "registry+https://github.com/rust-lang/crates.io-index"
192 | checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
193 | 
194 | [[package]]
195 | name = "syn"
196 | version = "2.0.48"
197 | source = "registry+https://github.com/rust-lang/crates.io-index"
198 | checksum = "0f3531638e407dfc0814761abb7c00a5b54992b849452a0646b7f65c9f770f3f"
199 | dependencies = [
200 |  "proc-macro2",
201 |  "quote",
202 |  "unicode-ident",
203 | ]
204 | 
205 | [[package]]
206 | name = "toml"
207 | version = "0.8.8"
208 | source = "registry+https://github.com/rust-lang/crates.io-index"
209 | checksum = "a1a195ec8c9da26928f773888e0742ca3ca1040c6cd859c919c9f59c1954ab35"
210 | dependencies = [
211 |  "serde",
212 |  "serde_spanned",
213 |  "toml_datetime",
214 |  "toml_edit",
215 | ]
216 | 
217 | [[package]]
218 | name = "toml_datetime"
219 | version = "0.6.5"
220 | source = "registry+https://github.com/rust-lang/crates.io-index"
221 | checksum = "3550f4e9685620ac18a50ed434eb3aec30db8ba93b0287467bca5826ea25baf1"
222 | dependencies = [
223 |  "serde",
224 | ]
225 | 
226 | [[package]]
227 | name = "toml_edit"
228 | version = "0.21.0"
229 | source = "registry+https://github.com/rust-lang/crates.io-index"
230 | checksum = "d34d383cd00a163b4a5b85053df514d45bc330f6de7737edfe0a93311d1eaa03"
231 | dependencies = [
232 |  "indexmap",
233 |  "serde",
234 |  "serde_spanned",
235 |  "toml_datetime",
236 |  "winnow",
237 | ]
238 | 
239 | [[package]]
240 | name = "unicode-ident"
241 | version = "1.0.12"
242 | source = "registry+https://github.com/rust-lang/crates.io-index"
243 | checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
244 | 
245 | [[package]]
246 | name = "utf8parse"
247 | version = "0.2.1"
248 | source = "registry+https://github.com/rust-lang/crates.io-index"
249 | checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a"
250 | 
251 | [[package]]
252 | name = "windows-sys"
253 | version = "0.52.0"
254 | source = "registry+https://github.com/rust-lang/crates.io-index"
255 | checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
256 | dependencies = [
257 |  "windows-targets",
258 | ]
259 | 
260 | [[package]]
261 | name = "windows-targets"
262 | version = "0.52.0"
263 | source = "registry+https://github.com/rust-lang/crates.io-index"
264 | checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd"
265 | dependencies = [
266 |  "windows_aarch64_gnullvm",
267 |  "windows_aarch64_msvc",
268 |  "windows_i686_gnu",
269 |  "windows_i686_msvc",
270 |  "windows_x86_64_gnu",
271 |  "windows_x86_64_gnullvm",
272 |  "windows_x86_64_msvc",
273 | ]
274 | 
275 | [[package]]
276 | name = "windows_aarch64_gnullvm"
277 | version = "0.52.0"
278 | source = "registry+https://github.com/rust-lang/crates.io-index"
279 | checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea"
280 | 
281 | [[package]]
282 | name = "windows_aarch64_msvc"
283 | version = "0.52.0"
284 | source = "registry+https://github.com/rust-lang/crates.io-index"
285 | checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef"
286 | 
287 | [[package]]
288 | name = "windows_i686_gnu"
289 | version = "0.52.0"
290 | source = "registry+https://github.com/rust-lang/crates.io-index"
291 | checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313"
292 | 
293 | [[package]]
294 | name = "windows_i686_msvc"
295 | version = "0.52.0"
296 | source = "registry+https://github.com/rust-lang/crates.io-index"
297 | checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a"
298 | 
299 | [[package]]
300 | name = "windows_x86_64_gnu"
301 | version = "0.52.0"
302 | source = "registry+https://github.com/rust-lang/crates.io-index"
303 | checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd"
304 | 
305 | [[package]]
306 | name = "windows_x86_64_gnullvm"
307 | version = "0.52.0"
308 | source = "registry+https://github.com/rust-lang/crates.io-index"
309 | checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e"
310 | 
311 | [[package]]
312 | name = "windows_x86_64_msvc"
313 | version = "0.52.0"
314 | source = "registry+https://github.com/rust-lang/crates.io-index"
315 | checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04"
316 | 
317 | [[package]]
318 | name = "winnow"
319 | version = "0.5.34"
320 | source = "registry+https://github.com/rust-lang/crates.io-index"
321 | checksum = "b7cf47b659b318dccbd69cc4797a39ae128f533dce7902a1096044d1967b9c16"
322 | dependencies = [
323 |  "memchr",
324 | ]
325 | 


--------------------------------------------------------------------------------
/src/keygen/Cargo.lock:
--------------------------------------------------------------------------------
  1 | # This file is automatically @generated by Cargo.
  2 | # It is not intended for manual editing.
  3 | version = 3
  4 | 
  5 | [[package]]
  6 | name = "anstream"
  7 | version = "0.6.11"
  8 | source = "registry+https://github.com/rust-lang/crates.io-index"
  9 | checksum = "6e2e1ebcb11de5c03c67de28a7df593d32191b44939c482e97702baaaa6ab6a5"
 10 | dependencies = [
 11 |  "anstyle",
 12 |  "anstyle-parse",
 13 |  "anstyle-query",
 14 |  "anstyle-wincon",
 15 |  "colorchoice",
 16 |  "utf8parse",
 17 | ]
 18 | 
 19 | [[package]]
 20 | name = "anstyle"
 21 | version = "1.0.6"
 22 | source = "registry+https://github.com/rust-lang/crates.io-index"
 23 | checksum = "8901269c6307e8d93993578286ac0edf7f195079ffff5ebdeea6a59ffb7e36bc"
 24 | 
 25 | [[package]]
 26 | name = "anstyle-parse"
 27 | version = "0.2.3"
 28 | source = "registry+https://github.com/rust-lang/crates.io-index"
 29 | checksum = "c75ac65da39e5fe5ab759307499ddad880d724eed2f6ce5b5e8a26f4f387928c"
 30 | dependencies = [
 31 |  "utf8parse",
 32 | ]
 33 | 
 34 | [[package]]
 35 | name = "anstyle-query"
 36 | version = "1.0.2"
 37 | source = "registry+https://github.com/rust-lang/crates.io-index"
 38 | checksum = "e28923312444cdd728e4738b3f9c9cac739500909bb3d3c94b43551b16517648"
 39 | dependencies = [
 40 |  "windows-sys",
 41 | ]
 42 | 
 43 | [[package]]
 44 | name = "anstyle-wincon"
 45 | version = "3.0.2"
 46 | source = "registry+https://github.com/rust-lang/crates.io-index"
 47 | checksum = "1cd54b81ec8d6180e24654d0b371ad22fc3dd083b6ff8ba325b72e00c87660a7"
 48 | dependencies = [
 49 |  "anstyle",
 50 |  "windows-sys",
 51 | ]
 52 | 
 53 | [[package]]
 54 | name = "autocfg"
 55 | version = "1.1.0"
 56 | source = "registry+https://github.com/rust-lang/crates.io-index"
 57 | checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
 58 | 
 59 | [[package]]
 60 | name = "cfg-if"
 61 | version = "1.0.0"
 62 | source = "registry+https://github.com/rust-lang/crates.io-index"
 63 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
 64 | 
 65 | [[package]]
 66 | name = "clap"
 67 | version = "4.4.18"
 68 | source = "registry+https://github.com/rust-lang/crates.io-index"
 69 | checksum = "1e578d6ec4194633722ccf9544794b71b1385c3c027efe0c55db226fc880865c"
 70 | dependencies = [
 71 |  "clap_builder",
 72 |  "clap_derive",
 73 | ]
 74 | 
 75 | [[package]]
 76 | name = "clap_builder"
 77 | version = "4.4.18"
 78 | source = "registry+https://github.com/rust-lang/crates.io-index"
 79 | checksum = "4df4df40ec50c46000231c914968278b1eb05098cf8f1b3a518a95030e71d1c7"
 80 | dependencies = [
 81 |  "anstream",
 82 |  "anstyle",
 83 |  "clap_lex",
 84 |  "strsim",
 85 | ]
 86 | 
 87 | [[package]]
 88 | name = "clap_derive"
 89 | version = "4.4.7"
 90 | source = "registry+https://github.com/rust-lang/crates.io-index"
 91 | checksum = "cf9804afaaf59a91e75b022a30fb7229a7901f60c755489cc61c9b423b836442"
 92 | dependencies = [
 93 |  "heck",
 94 |  "proc-macro2",
 95 |  "quote",
 96 |  "syn",
 97 | ]
 98 | 
 99 | [[package]]
100 | name = "clap_lex"
101 | version = "0.6.0"
102 | source = "registry+https://github.com/rust-lang/crates.io-index"
103 | checksum = "702fc72eb24e5a1e48ce58027a675bc24edd52096d5397d4aea7c6dd9eca0bd1"
104 | 
105 | [[package]]
106 | name = "colorchoice"
107 | version = "1.0.0"
108 | source = "registry+https://github.com/rust-lang/crates.io-index"
109 | checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7"
110 | 
111 | [[package]]
112 | name = "getrandom"
113 | version = "0.2.12"
114 | source = "registry+https://github.com/rust-lang/crates.io-index"
115 | checksum = "190092ea657667030ac6a35e305e62fc4dd69fd98ac98631e5d3a2b1575a12b5"
116 | dependencies = [
117 |  "cfg-if",
118 |  "libc",
119 |  "wasi",
120 | ]
121 | 
122 | [[package]]
123 | name = "heck"
124 | version = "0.4.1"
125 | source = "registry+https://github.com/rust-lang/crates.io-index"
126 | checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
127 | 
128 | [[package]]
129 | name = "keygen"
130 | version = "0.1.0"
131 | dependencies = [
132 |  "clap",
133 |  "rand",
134 |  "rand_distr",
135 | ]
136 | 
137 | [[package]]
138 | name = "libc"
139 | version = "0.2.153"
140 | source = "registry+https://github.com/rust-lang/crates.io-index"
141 | checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd"
142 | 
143 | [[package]]
144 | name = "libm"
145 | version = "0.2.8"
146 | source = "registry+https://github.com/rust-lang/crates.io-index"
147 | checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058"
148 | 
149 | [[package]]
150 | name = "num-traits"
151 | version = "0.2.17"
152 | source = "registry+https://github.com/rust-lang/crates.io-index"
153 | checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c"
154 | dependencies = [
155 |  "autocfg",
156 |  "libm",
157 | ]
158 | 
159 | [[package]]
160 | name = "ppv-lite86"
161 | version = "0.2.17"
162 | source = "registry+https://github.com/rust-lang/crates.io-index"
163 | checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
164 | 
165 | [[package]]
166 | name = "proc-macro2"
167 | version = "1.0.78"
168 | source = "registry+https://github.com/rust-lang/crates.io-index"
169 | checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae"
170 | dependencies = [
171 |  "unicode-ident",
172 | ]
173 | 
174 | [[package]]
175 | name = "quote"
176 | version = "1.0.35"
177 | source = "registry+https://github.com/rust-lang/crates.io-index"
178 | checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef"
179 | dependencies = [
180 |  "proc-macro2",
181 | ]
182 | 
183 | [[package]]
184 | name = "rand"
185 | version = "0.8.5"
186 | source = "registry+https://github.com/rust-lang/crates.io-index"
187 | checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
188 | dependencies = [
189 |  "libc",
190 |  "rand_chacha",
191 |  "rand_core",
192 | ]
193 | 
194 | [[package]]
195 | name = "rand_chacha"
196 | version = "0.3.1"
197 | source = "registry+https://github.com/rust-lang/crates.io-index"
198 | checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
199 | dependencies = [
200 |  "ppv-lite86",
201 |  "rand_core",
202 | ]
203 | 
204 | [[package]]
205 | name = "rand_core"
206 | version = "0.6.4"
207 | source = "registry+https://github.com/rust-lang/crates.io-index"
208 | checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
209 | dependencies = [
210 |  "getrandom",
211 | ]
212 | 
213 | [[package]]
214 | name = "rand_distr"
215 | version = "0.4.3"
216 | source = "registry+https://github.com/rust-lang/crates.io-index"
217 | checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31"
218 | dependencies = [
219 |  "num-traits",
220 |  "rand",
221 | ]
222 | 
223 | [[package]]
224 | name = "strsim"
225 | version = "0.10.0"
226 | source = "registry+https://github.com/rust-lang/crates.io-index"
227 | checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
228 | 
229 | [[package]]
230 | name = "syn"
231 | version = "2.0.48"
232 | source = "registry+https://github.com/rust-lang/crates.io-index"
233 | checksum = "0f3531638e407dfc0814761abb7c00a5b54992b849452a0646b7f65c9f770f3f"
234 | dependencies = [
235 |  "proc-macro2",
236 |  "quote",
237 |  "unicode-ident",
238 | ]
239 | 
240 | [[package]]
241 | name = "unicode-ident"
242 | version = "1.0.12"
243 | source = "registry+https://github.com/rust-lang/crates.io-index"
244 | checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
245 | 
246 | [[package]]
247 | name = "utf8parse"
248 | version = "0.2.1"
249 | source = "registry+https://github.com/rust-lang/crates.io-index"
250 | checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a"
251 | 
252 | [[package]]
253 | name = "wasi"
254 | version = "0.11.0+wasi-snapshot-preview1"
255 | source = "registry+https://github.com/rust-lang/crates.io-index"
256 | checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
257 | 
258 | [[package]]
259 | name = "windows-sys"
260 | version = "0.52.0"
261 | source = "registry+https://github.com/rust-lang/crates.io-index"
262 | checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
263 | dependencies = [
264 |  "windows-targets",
265 | ]
266 | 
267 | [[package]]
268 | name = "windows-targets"
269 | version = "0.52.0"
270 | source = "registry+https://github.com/rust-lang/crates.io-index"
271 | checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd"
272 | dependencies = [
273 |  "windows_aarch64_gnullvm",
274 |  "windows_aarch64_msvc",
275 |  "windows_i686_gnu",
276 |  "windows_i686_msvc",
277 |  "windows_x86_64_gnu",
278 |  "windows_x86_64_gnullvm",
279 |  "windows_x86_64_msvc",
280 | ]
281 | 
282 | [[package]]
283 | name = "windows_aarch64_gnullvm"
284 | version = "0.52.0"
285 | source = "registry+https://github.com/rust-lang/crates.io-index"
286 | checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea"
287 | 
288 | [[package]]
289 | name = "windows_aarch64_msvc"
290 | version = "0.52.0"
291 | source = "registry+https://github.com/rust-lang/crates.io-index"
292 | checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef"
293 | 
294 | [[package]]
295 | name = "windows_i686_gnu"
296 | version = "0.52.0"
297 | source = "registry+https://github.com/rust-lang/crates.io-index"
298 | checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313"
299 | 
300 | [[package]]
301 | name = "windows_i686_msvc"
302 | version = "0.52.0"
303 | source = "registry+https://github.com/rust-lang/crates.io-index"
304 | checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a"
305 | 
306 | [[package]]
307 | name = "windows_x86_64_gnu"
308 | version = "0.52.0"
309 | source = "registry+https://github.com/rust-lang/crates.io-index"
310 | checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd"
311 | 
312 | [[package]]
313 | name = "windows_x86_64_gnullvm"
314 | version = "0.52.0"
315 | source = "registry+https://github.com/rust-lang/crates.io-index"
316 | checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e"
317 | 
318 | [[package]]
319 | name = "windows_x86_64_msvc"
320 | version = "0.52.0"
321 | source = "registry+https://github.com/rust-lang/crates.io-index"
322 | checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04"
323 | 


--------------------------------------------------------------------------------
/src/keygen/src/main.rs:
--------------------------------------------------------------------------------
  1 | use std::{
  2 |     io::{stdout, BufWriter, Write},
  3 |     num::NonZeroU32,
  4 | };
  5 | 
  6 | use clap::{Parser, ValueEnum};
  7 | use rand::{Rng, SeedableRng};
  8 | use rand_distr::{Distribution as Dist, Normal, Uniform};
  9 | 
 10 | /// Represents a List (e.g. `[0-9]`) of possible values
 11 | #[derive(Debug)]
 12 | struct List {
 13 |     inner: String,
 14 |     /// A complement is a list with a caret: [^0-9]. It means we should generate all values BUT
 15 |     /// those in the list
 16 |     complement: bool,
 17 | }
 18 | 
 19 | impl List {
 20 |     /// Returns a random character from the list
 21 |     pub fn generate<R: Rng, D: Dist<f64>>(&self, r: &mut R, d: &D) -> char {
 22 |         if !self.complement {
 23 |             self.inner
 24 |                 .chars()
 25 |                 .nth((d.sample(r).rem_euclid(1.0) * (self.inner.len() - 1) as f64) as usize)
 26 |                 .unwrap()
 27 |         } else {
 28 |             let mut ch = rand::random();
 29 |             while self.inner.contains(ch) {
 30 |                 ch = (ch as u8 + 1) as char;
 31 |             }
 32 |             ch
 33 |         }
 34 |     }
 35 | 
 36 |     /// Returns the specified character from the list, decrementing the `i` accordingly
 37 |     pub fn generate_inc(&self, i: &mut u64) -> char {
 38 |         if !self.complement {
 39 |             let ch = self.inner.chars().cycle().nth(*i as usize).unwrap();
 40 |             let n = self.inner.chars().count() as u64;
 41 |             *i = if *i < n { 0 } else { *i / n };
 42 |             ch
 43 |         } else {
 44 |             let mut ch =
 45 |                 char::from_u32(*i as u32 + 32).expect("failed to generate incremental '.'");
 46 |             while self.inner.contains(ch) {
 47 |                 ch = (ch as u8 + 1) as char;
 48 |             }
 49 |             *i = (*i).saturating_sub(1);
 50 |             ch
 51 |         }
 52 |     }
 53 | }
 54 | 
 55 | /// Possible Regex symbols
 56 | #[derive(Debug)]
 57 | enum RegexSymbol {
 58 |     /// stands for the exact character it contains
 59 |     Literal(char),
 60 |     /// a parenthesized group, e.g. `(inner_regex)`
 61 |     Group(Vec<Regex>),
 62 |     /// list, e.g. `[0-9]`
 63 |     List(List),
 64 | }
 65 | 
 66 | /// A Regex is a symbol and its possible modifiers
 67 | ///
 68 | /// In this program, we only accept repetitions `{n}` as modifiers
 69 | /// The operators `+`, `*` and `?` are all not accepted
 70 | #[derive(Debug)]
 71 | struct Regex {
 72 |     symbol: RegexSymbol,
 73 |     repetitions: Option<NonZeroU32>,
 74 | }
 75 | 
 76 | impl Regex {
 77 |     /// Initializes a new Regex from a symbol, setting all modifiers to false
 78 |     pub fn new(symbol: RegexSymbol) -> Self {
 79 |         Self {
 80 |             symbol,
 81 |             repetitions: None,
 82 |         }
 83 |     }
 84 | 
 85 |     /// Returns random characters according to the regular expression rules
 86 |     pub fn generate<R: Rng, D: Dist<f64>>(&self, r: &mut R, d: &D) -> String {
 87 |         let repetitions = self.repetitions.map(|r| r.get()).unwrap_or(1);
 88 |         let mut s = String::with_capacity(repetitions as usize);
 89 |         for _ in 0..repetitions {
 90 |             match &self.symbol {
 91 |                 RegexSymbol::Literal(literal) => s.push(*literal),
 92 |                 RegexSymbol::Group(group) => s.push_str(&generate(group, r, d)),
 93 |                 RegexSymbol::List(list) => s.push(list.generate(r, d)),
 94 |             }
 95 |         }
 96 | 
 97 |         s
 98 |     }
 99 | 
100 |     /// Returns incremental characters according to the regular expression rules
101 |     pub fn generate_inc(&self, i: &mut u64) -> String {
102 |         let repetitions = self.repetitions.map(|r| r.get()).unwrap_or(1);
103 |         let mut s = String::with_capacity(repetitions as usize);
104 |         for _ in 0..repetitions {
105 |             match &self.symbol {
106 |                 RegexSymbol::Literal(literal) => s.insert(0, *literal),
107 |                 RegexSymbol::Group(group) => s = generate_inc(group, i) + &s,
108 |                 RegexSymbol::List(list) => s.insert(0, list.generate_inc(i)),
109 |             }
110 |         }
111 | 
112 |         s
113 |     }
114 | }
115 | 
116 | /// generate a random regex
117 | fn generate<R: Rng, D: Dist<f64>>(regexes: &[Regex], r: &mut R, d: &D) -> String {
118 |     let mut s = String::with_capacity(regexes.len());
119 |     for regex in regexes {
120 |         s.push_str(&regex.generate(r, d));
121 |     }
122 |     s
123 | }
124 | 
125 | /// generate a specific regex, according to `i`
126 | fn generate_inc(regexes: &[Regex], i: &mut u64) -> String {
127 |     let mut s = String::with_capacity(regexes.len());
128 |     for regex in regexes.iter().rev() {
129 |         s = regex.generate_inc(i) + &s;
130 |     }
131 |     s
132 | }
133 | 
134 | /// parse a list ([0-9])
135 | fn parse_list(chars: &mut std::str::Chars) -> Regex {
136 |     let mut inner = String::new();
137 | 
138 |     let first = chars.next().unwrap();
139 |     let complement = first == '^';
140 |     if !complement {
141 |         inner.push(first);
142 |     }
143 | 
144 |     while let Some(ch) = chars.next() {
145 |         if ch == ']' {
146 |             break;
147 |         } else if ch == '-' {
148 |             let prev = match inner.pop() {
149 |                 Some(ch) => ch,
150 |                 None => {
151 |                     inner.push('-');
152 |                     continue;
153 |                 }
154 |             };
155 | 
156 |             let next = chars.next().unwrap();
157 |             if next == ']' {
158 |                 inner.push(prev);
159 |                 inner.push('-');
160 |                 break;
161 |             }
162 | 
163 |             for ch in prev..=next {
164 |                 inner.push(ch);
165 |             }
166 |         } else {
167 |             inner.push(ch);
168 |         }
169 |     }
170 | 
171 |     Regex::new(RegexSymbol::List(List { inner, complement }))
172 | }
173 | 
174 | /// parse a list ([0-9])
175 | fn parse_repetitions(chars: &mut std::str::Chars) -> NonZeroU32 {
176 |     let mut s = String::new();
177 |     for ch in chars.by_ref() {
178 |         if ch == '}' {
179 |             break;
180 |         }
181 |         s.push(ch);
182 |     }
183 | 
184 |     let uint: u32 = s
185 |         .trim()
186 |         .parse()
187 |         .unwrap_or_else(|_| panic!("failed to parse: {} as an unsigned integer", s));
188 |     NonZeroU32::new(uint).expect("integer in a '{}' repetition must be > 0")
189 | }
190 | 
191 | /// parse a group ((inner_regex))
192 | fn parse_group(chars: &mut std::str::Chars) -> Regex {
193 |     use RegexSymbol::*;
194 | 
195 |     let mut group: Vec<Regex> = Vec::new();
196 | 
197 |     while let Some(ch) = chars.next() {
198 |         match ch {
199 |             '\\' => {
200 |                 let next = chars.next().unwrap();
201 |                 group.push(Regex::new(Literal(next)));
202 |             }
203 |             '[' => group.push(parse_list(chars)),
204 |             '{' => group.last_mut().unwrap().repetitions = Some(parse_repetitions(chars)),
205 |             ')' => break,
206 |             '(' => group.push(parse_group(chars)),
207 |             ch => group.push(Regex::new(Literal(ch))),
208 |         }
209 |     }
210 | 
211 |     Regex::new(Group(group))
212 | }
213 | 
214 | /// parse the regex
215 | fn parse_regex(mut chars: std::str::Chars) -> Vec<Regex> {
216 |     use RegexSymbol::*;
217 | 
218 |     let mut tree: Vec<Regex> = Vec::new();
219 | 
220 |     while let Some(ch) = chars.next() {
221 |         match ch {
222 |             '\\' => {
223 |                 let next = chars.next().unwrap();
224 |                 tree.push(Regex::new(Literal(next)));
225 |             }
226 |             '[' => tree.push(parse_list(&mut chars)),
227 |             '{' => tree.last_mut().unwrap().repetitions = Some(parse_repetitions(&mut chars)),
228 |             ')' => panic!("badly formatted regex!"),
229 |             '(' => tree.push(parse_group(&mut chars)),
230 |             ch => tree.push(Regex::new(Literal(ch))),
231 |         }
232 |     }
233 | 
234 |     tree
235 | }
236 | 
237 | #[derive(Clone, Copy, ValueEnum)]
238 | /// Distribution to use when randomly generating the characters
239 | enum Distribution {
240 |     /// Uniform distribution
241 |     Uniform,
242 |     /// Normal distribution
243 |     Normal,
244 |     /// Incremental distribution (VERY SLOW)
245 |     ///
246 |     /// For example, a regex like [0-9]{3} will produce '001', '002', '003', and so on, in order.
247 |     Incremental,
248 | }
249 | 
250 | #[derive(Parser)]
251 | #[command(version, name = "keygen")]
252 | /// `keygen` generates random strings based on a regex
253 | ///
254 | /// Note **DO NOT IMPLEMENT** several regexes operations:
255 | ///   * the `*` operator
256 | ///   * the `+` operator
257 | ///   * the `?` operator
258 | ///   * the `.` operator
259 | ///
260 | /// Regexes passed to this program may only make use of `()` groups, `[]` lists of characters and
261 | /// `{n}`, where `n` is an integer. For example, to generate 3 digit numbers, you would use:
262 | /// `[0-9]{3}`
263 | struct Command {
264 |     /// Regex used to generate the strings
265 |     ///
266 |     /// Attention! The OR operator (|) is not implemented!
267 |     regex: String,
268 | 
269 |     /// Number of elements to generate
270 |     #[clap(short, long, default_value = "100")]
271 |     number: u64,
272 | 
273 |     /// Seed used for random number generation
274 |     #[clap(short, long, default_value = "223554")]
275 |     seed: u64,
276 | 
277 |     /// Distribution used in random generation
278 |     #[clap(short, long, default_value = "uniform")]
279 |     distribution: Distribution,
280 | }
281 | 
282 | fn main() {
283 |     let cmd = Command::parse();
284 | 
285 |     let mut r = rand::rngs::StdRng::seed_from_u64(cmd.seed);
286 | 
287 |     let regex = parse_regex(cmd.regex.chars());
288 | 
289 |     #[cfg(debug_assertions)]
290 |     {
291 |         println!("{regex:#?}");
292 |     }
293 | 
294 |     let stdout = stdout();
295 |     let lock = stdout.lock();
296 |     let mut writer = BufWriter::new(lock);
297 |     match cmd.distribution {
298 |         Distribution::Uniform => {
299 |             let d = Uniform::new(0.0, 1.0);
300 |             for _ in 0..cmd.number {
301 |                 writeln!(writer, "{}", generate(&regex, &mut r, &d)).unwrap();
302 |             }
303 |         }
304 |         Distribution::Normal => {
305 |             let d = Normal::new(0.5, 0.25).unwrap();
306 |             for _ in 0..cmd.number {
307 |                 writeln!(writer, "{}", generate(&regex, &mut r, &d)).unwrap();
308 |             }
309 |         }
310 |         Distribution::Incremental => {
311 |             for mut i in 0..cmd.number {
312 |                 writeln!(writer, "{}", generate_inc(&regex, &mut i)).unwrap();
313 |             }
314 |         }
315 |     }
316 | }
317 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <p align="center">
  2 |   <img alt="Sepe Banner" src="assets/images/SepeBanner.png" width="95%" height="auto"/></br>
  3 | </p>
  4 | 
  5 | ## Introduction
  6 | 
  7 | This project's goal is to devise, implement, and evaluate techniques for generating optimized hash functions tailored for string keys whose format can be approximated by a regular expression inferred through profiling. These functions will be used to improve the performance of C++'s STL data structures, such as `std::unordered_map`, `std::unordered_set`, `std::unordered_multimap` and `std::unordered_multiset`, in addition to any other `std::hash` specialization for user-defined C++ types.
  8 | 
  9 | ### Dependencies
 10 | 
 11 | These are the most important dependencies for building and running all Sepe programs:
 12 | 
 13 | | Dependency | Version   | Installation Link                            |
 14 | |------------|-----------|----------------------------------------------|
 15 | | clang      | >= 14.0.0 | [llvm.org](https://llvm.org/docs/CMake.html) |
 16 | | CMake      | >= 3.20   | [cmake.org](https://cmake.org/install/)      |
 17 | | Rust       | >= 1.7    | [rust.org](https://www.rust-lang.org/tools/install)|
 18 | | Python     | >= 3.10   | [python.org](https://wiki.python.org/moin/BeginnersGuide/Download)|
 19 | 
 20 | Rust is only necessary if you want to run the experiments. If you are only interested in the hash functions generation, only `clang` is necessary.
 21 | 
 22 | ## Quick-Start: Synthesizing functions
 23 | 
 24 | You can follow these two steps to use optimized hash functions generated from this project:
 25 | 
 26 | 1. Obtain your synthesized hash function in one of the two ways: 
 27 |    1. Using a set of [key examples](#synthesizing-from-key-examples).
 28 |    2. Using the [regular expression of the keys](#synthesizing-from-regular-expression).
 29 | 2. [Integrate](#integrating-the-synthesized-function-into-your-project) the optimized hash function into your code .
 30 | 
 31 | ### Synthesizing from Key Examples
 32 | 
 33 | To synthesize hash functions from key examples, you only need to create a file containing a non-exhaustive but representative key set. 
 34 | 
 35 | Supposing your key strings are saved in the `txt-file-with-strings` file, you can run the following command:
 36 | 
 37 | ```sh
 38 | ./bin/keysynth "$(./bin/keybuilder < txt-file-with-strings)"
 39 | ```
 40 | 
 41 | ### Synthesizing from Regular Expression
 42 | 
 43 | To build the hash function from the regular expression of your keys, use:
 44 | 
 45 | ```sh
 46 | make
 47 | ./scripts/make_hash_from_regex.sh [REGEX]
 48 | ```
 49 | 
 50 | Example: *Generating a custom hash function for IPV4 keys*
 51 | ```sh
 52 | ./scripts/make_hash_from_regex.sh "(([0-9]{3})\.){3}[0-9]{3}" #or single quotes in zshell
 53 | ```
 54 | 
 55 | See more about regular expressions in the [keygen](#keygen) section.
 56 | 
 57 | ### Integrating the Synthesized function into your project
 58 | 
 59 | Suppose your code has a C++ STL std::unordered_map with IPV4 std::string as keys and int as values.
 60 | 
 61 | ```cpp
 62 | void yourCode(void){
 63 |         std::unordered_map<std::string, int, synthesizedOffXorHash> map;
 64 |         map["255.255.255.255"] = 42;
 65 |         // more code that uses map object
 66 | }
 67 | ```
 68 | 
 69 | After running, `./scripts/make_hash_from_regex.sh "(([0-9]{3})\.){3}[0-9]{3}"`, you should get the following output with two function options:
 70 | 
 71 | ```cpp
 72 | // Helper function, include in your codebase:
 73 | inline static uint64_t load_u64_le(const char* b) {
 74 |         uint64_t Ret;
 75 |         // This is a way for the compiler to optimize this func to a single movq instruction
 76 |         memcpy(&Ret, b, sizeof(uint64_t));
 77 |         return Ret;
 78 | }
 79 | // Pext Hash Function:
 80 | struct synthesizedPextHash {
 81 |     // Omitted for brevity in this code snippet
 82 | };
 83 | // OffXor Hash Function:
 84 | struct synthesizedOffXorHash {
 85 |         std::size_t operator()(const std::string& key) const {
 86 |                 const std::size_t hashable0 = load_u64_le(key.c_str()+0);
 87 |                 const std::size_t hashable1 = load_u64_le(key.c_str()+7);
 88 |                 size_t tmp0 = hashable0 ^ hashable1;
 89 |                 return tmp0;
 90 |         }
 91 | };
 92 | ```
 93 | 
 94 | *If in doubt, we always recommend using the synthesizedOffXorHash variant, according to our benchmarks.*
 95 | Copy and paste the desired hash function, in this example, `synthesizedOffXorHash`, into your codebase and then add its name as the third argument in the std::unordered_map template.
 96 | 
 97 | ```cpp
 98 | inline static uint64_t load_u64_le(const char* b) {
 99 |         uint64_t Ret;
100 |         // This is a way for the compiler to optimize this func to a single movq instruction
101 |         memcpy(&Ret, b, sizeof(uint64_t));
102 |         return Ret;
103 | }
104 | 
105 | struct synthesizedOffXorHash {
106 |         std::size_t operator()(const std::string& key) const {
107 |                 const std::size_t hashable0 = load_u64_le(key.c_str()+0);
108 |                 const std::size_t hashable1 = load_u64_le(key.c_str()+7);
109 |                 size_t tmp0 = hashable0 ^ hashable1;
110 |                 return tmp0;
111 |         }
112 | };
113 | 
114 | void yourCode(void){
115 |         std::unordered_map<std::string, int, synthesizedOffXorHash> map;
116 |         map["255.255.255.255"] = 42;
117 |         // more code that uses map object
118 | }
119 | ```
120 | 
121 | ## Quick-Start: Benchmarking
122 | 
123 | Building and running with default parameters:
124 | 
125 | ```sh
126 | ./scripts/install_abseil.sh # necessary for keyuser
127 | make && make benchmark
128 | ./bin/sepe-runner [REGEXES]
129 | ```
130 | Valid regexes are listed in the `Regexes.toml` file.
131 | 
132 | Example: *Benchmarking all IPV4 hash functions with default parameters*
133 | ```
134 | ./bin/sepe-runner IPV4
135 | ./scripts/keyuser_interpreter.py -p IPV4_performance.csv
136 | ```
137 | 
138 | For more options, see [sepe-runner](#sepe-runner) section: 
139 | 
140 | ## Sepe Components
141 | 
142 | ### keygen
143 | 
144 | `keygen` generates (standard output) n random keys from Regex. 
145 | 
146 | Not all valid regexes are accepted since we did not implement the `OR` (`|`), `Kleene Star` (`*`),  `Plus` (`+`), and `DOT` (`.`)  operators.
147 | 
148 | ```sh
149 | ./bin/keygen REGEX [number_of_elements] [seed]
150 | ```
151 | 
152 | Example: *Generating 2 random IPV4 keys with seed 223554*
153 | 
154 | ```sh
155 | ./bin/keygen "(([0-9]{3})\.){3}[0-9]{3}" -n 2 -s 223554
156 | 313.797.178.390
157 | 445.982.868.308
158 | ```
159 | 
160 | For more options, do:
161 | ```sh
162 | ./bin/keygen --help
163 | ```
164 | 
165 | ### keyuser
166 | 
167 | *We recommend using keyuser via [sepe-runner](#sepe-runner)*
168 | 
169 | `keyuser` benchmarks custom hash functions with keys received from standard input.
170 | 
171 | ```sh
172 | <standard_output_keys> | ./bin/keyuser [hashes] <num_operations> <insert> <search> <elimination> [seed] [verbose]
173 | ```
174 | 
175 | **If no [hashes] are specified, only generic hash functions are executed**
176 | 
177 | Example: *Benchmarking 2 IPV4 Keys with 10 total operations using STDHashBin PextIPV4 hash functions. 50% insertions, 30% search, and 20% elimination operations.*
178 | 
179 | ```sh
180 | ./bin/keygen "(([0-9]{3})\.){3}[0-9]{3}" -n 2 -s 223554 | ./bin/keyuser --hashes STDHashBin PextIPV4 -n 10 -i 50 -s 30 -e 20
181 | ```
182 | 
183 | For more options, do:
184 | ```sh
185 | ./bin/keyuser --help
186 | ```
187 | 
188 | ### keybuilder
189 | 
190 | `keybuilder` creates a regex from a series of strings passed through standard input, separated by a new line.
191 | 
192 | ```sh
193 | ./bin/keybuilder < txt-file-with-strings
194 | ```
195 | 
196 | ### keysynth
197 | 
198 | `keysynth` synthesizes the hash functions based on the regex generated by the `keybuilder`. It is picky about the regex's format, so it is not recommended to hand-write it. Use `keybuilder` instead.
199 | 
200 | ```sh
201 | ./bin/keysynth "$(./bin/keybuilder < txt-file-with-strings)"
202 | ```
203 | 
204 | ### sepe-runner
205 | 
206 | `sepe-runner` is a helper program that connects the other programs together as needed.
207 | 
208 | `Regexes.toml` is a configuration file containing all accepted `sepe-runner` regular expressions and their associated Hash Functions. *Changing this file also requires changing `keyuser`.*
209 | 
210 | ```sh
211 | ./bin/sepe-runner Regex-entry-in-Regexes.toml
212 | ```
213 | Some relevant parameters are:
214 | - `-k, --keys`: Number of keys to generate
215 | - `-o, --operations`: Number of operations to run
216 | - `-i, --insert`: Percentage of insertion operations
217 | - `-s, --search`: Percentage of search operations
218 | - `-e, --elimination`: Percentage of elimination operations
219 | - `--histogram`: Generate the distribution histogram for the given regex, do not run experiments
220 | 
221 | Example: *Running the IPV4 benchmark*
222 | 
223 | ```sh
224 | ./bin/sepe-runner IPV4
225 | ```
226 | 
227 | For more options, do:
228 | ```sh
229 | ./bin/sepe-runner --help
230 | ```
231 | 
232 | ## Helper Scripts
233 | 
234 | The `scripts` folder contains some helper scripts that may be useful for some people:
235 | 
236 |   * `align_csv.sh` - pretty prints `keyuser`'s generated `.csv` files for easier analysis
237 |   * `benchmark.sh` - helper to run many benchmarks at once
238 |   * `install_abseil.sh` - installs the abseil library locally. Necessary for `keyuser`
239 |   * `make_hash_from_regex.sh` - creates a hash function from a user-defined regex
240 |   * `keyuser_interpreter.py` - interprets the results generated from `keyuser`'s benchmarks
241 | 
242 | ### Using `keyuser_interpreter.py`
243 | 
244 |   This script is used to help interpret the output of `keyuser`. It can plot graphs, generate tables, and perform statistical analysis.
245 | 
246 |   The most relevant configurations are:
247 | 
248 | ```
249 | -d DISTRIBUTION, --distribution DISTRIBUTION
250 |                       Name of the distribution file to interpret. Exclusive with -p option.
251 | -p [PERFORMANCE ...], --performance [PERFORMANCE ...]
252 |                       Name of the CSV performance files to interpret. Exclusive with -d option.
253 | -pg, --plot-graph     Option to plot the results in graphs.
254 | -hf [HASH_FUNCTIONS ...], --hash-functions [HASH_FUNCTIONS ...]
255 |                       Name of the hash functions to analyze.
256 | ```
257 | 
258 | Example for interpreting performance using IPV4 keys:
259 | ```sh
260 | ./bin/sepe-runner IPV4 && ./scripts/keyuser_interpreter.py -p IPV4_performance.csv
261 | ```
262 | 
263 | Example for interpreting hash distribution using IPV4 keys:
264 | ```sh
265 | ./bin/sepe-runner --histogram IPV4 && ./scripts/keyuser_interpreter.py -d IPV4_distribution.py
266 | ```
267 | 
268 | ### Reproducing Experiments
269 | 
270 | The [artifact branch](https://github.com/lac-dcc/sepe/tree/artifact) reproduces the research questions from the paper.
271 | All scripts to reproduce the RQs are available in a Docker container.
272 | RQ1 and RQ2 can be reproduced with a single script \texttt{rq1\_rq2\_benchmark.sh}. 
273 | All other RQs have an individual script \texttt{rq\_benchmark.sh}. 
274 | 
275 | 


--------------------------------------------------------------------------------
/src/keyuser/src/benchmarks.cpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * @file benchmarks.cpp
  3 |  * @brief Implementation of benchmarking functions.
  4 |  * 
  5 |  * This file includes the implementation of functions for executing benchmarks.
  6 |  * The benchmarks are performed on a set of keys and according to a set of parameters.
  7 |  */
  8 | 
  9 | #include "benchmarks.hpp"
 10 | 
 11 | #include <chrono>
 12 | #include <algorithm>
 13 | #include <random>
 14 | 
 15 | void executeInterweaved(Benchmark* bench, 
 16 |                         const std::vector<std::string>& keys, 
 17 |                         const BenchmarkParameters& args)
 18 | {
 19 |     // Seed random number generator
 20 |     srand(args.seed);
 21 | 
 22 |     // Interweaved execution mode parameters
 23 |     int numInsert = (args.insert * args.numOperations) / 100;
 24 | 
 25 | 
 26 |     if ( args.distribution == "normal" ) {
 27 |         // Create a binomial distribution with parameters n=10 and p=0.5
 28 |         std::default_random_engine generator;
 29 |         std::binomial_distribution<int> distribution(keys.size(), 0.5);
 30 | 
 31 |         // First, insert 50% of the numInserts
 32 |         for(int j = 0; j < numInsert/2; j++){
 33 |             int randomKey = distribution(generator) % keys.size();
 34 |             bench->insert(keys[randomKey]);
 35 |         }
 36 |         for(int j = 0; j < (args.numOperations-(numInsert/2)); j++){
 37 |             int randomKey = distribution(generator) % keys.size();
 38 |             int randomOp = rand() % 100;
 39 |             if(randomOp < args.insert){
 40 |                 bench->insert(keys[randomKey]);
 41 |             }else if(randomOp < args.insert + args.search){
 42 |                 bench->search(keys[randomKey]);
 43 |             }else{
 44 |                 bench->elimination(keys[randomKey]);
 45 |             }
 46 |         }
 47 |     } else if ( args.distribution == "uniform" ) {
 48 |         // First, insert 50% of the numInserts
 49 |         for(int j = 0; j < numInsert/2; j++){
 50 |             int randomKey = rand() % keys.size();
 51 |             bench->insert(keys[randomKey]);
 52 |         }
 53 |         for(int j = 0; j < (args.numOperations-(numInsert/2)); j++){
 54 |             int randomKey = rand() % keys.size();
 55 |             int randomOp = rand() % 100;
 56 |             if(randomOp < args.insert){
 57 |                 bench->insert(keys[randomKey]);
 58 |             }else if(randomOp < args.insert + args.search){
 59 |                 bench->search(keys[randomKey]);
 60 |             }else{
 61 |                 bench->elimination(keys[randomKey]);
 62 |             }
 63 |         }
 64 |     }
 65 | 
 66 | }
 67 | 
 68 | void executeBatched(Benchmark* bench, 
 69 |                     const std::vector<std::string>& keys,
 70 |                     const BenchmarkParameters& args)
 71 | {
 72 |     // Seed random number generator
 73 |     srand(args.seed);
 74 | 
 75 |     // Batch execution mode parameters
 76 |     int numInsert = (args.insert * args.numOperations) / 100;
 77 |     int numSearch = (args.search * args.numOperations) / 100;
 78 |     int numElimination = (args.elimination * args.numOperations) / 100;
 79 | 
 80 |     if ( args.distribution == "normal" ) {
 81 |         // Create a binomial distribution with parameters n=10 and p=0.5
 82 |         std::default_random_engine generator;
 83 |         std::binomial_distribution<int> distribution(keys.size(), 0.5);
 84 |         for(int j = 0; j < numInsert; j++){
 85 |             int randomKey = distribution(generator) % keys.size();
 86 |             bench->insert(keys[randomKey]);
 87 |         }
 88 |         for(int j = 0; j < numSearch; j++){
 89 |             int randomKey = distribution(generator) % keys.size();
 90 |             bench->search(keys[randomKey]);
 91 |         }
 92 |         for(int j = 0; j < numElimination; j++){
 93 |             int randomKey = distribution(generator) % keys.size();
 94 |             bench->elimination(keys[randomKey]);
 95 |         }
 96 |     } else if ( args.distribution == "uniform" ) {
 97 |         for(int j = 0; j < numInsert; j++){
 98 |             int randomKey = rand() % keys.size();
 99 |             bench->insert(keys[randomKey]);
100 |         }
101 |         for(int j = 0; j < numSearch; j++){
102 |             int randomKey = rand() % keys.size();
103 |             bench->search(keys[randomKey]);
104 |         }
105 |         for(int j = 0; j < numElimination; j++){
106 |             int randomKey = rand() % keys.size();
107 |             bench->elimination(keys[randomKey]);
108 |         }
109 |     }
110 | }
111 | 
112 | void printVerbose(Benchmark& bench, const std::chrono::duration<double>& elapsed_seconds){
113 |     printf( "\t\t%-25s %25s    Elapsed time: %f (s)    "
114 |             "Collision Count (Buckets): %d\n", 
115 |             bench.getContainerName().c_str(), 
116 |             bench.getHashName().c_str(), 
117 |             elapsed_seconds.count(),
118 |             bench.calculateCollisionCountBuckets());
119 | }
120 | 
121 | static void reportMetricsCSV(
122 |                                  const char* execMode,
123 |                                  const char* argsString,
124 |                                  const char* containerName,
125 |                                  const char* hashFuncName,
126 |                                  const float execTime,
127 |                                  const int collisions)
128 | {
129 |     printf( "%s,%s,%s,%s,%f,%d\n",
130 |             execMode,
131 |             argsString,
132 |             containerName,
133 |             hashFuncName,
134 |             execTime,
135 |             collisions);
136 | }
137 | 
138 | void benchmarkExecutor(const std::vector<Benchmark*>& benchmarks, 
139 |                        const std::vector<std::string>& keys, 
140 |                        const BenchmarkParameters& args)
141 | {
142 | 
143 |     // Init CSV File
144 |     printf("%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n",
145 |         "Execution Mode",
146 |         "Key Distribution",
147 |         "Num Operations",
148 |         "Num Keys",
149 |         "Insertions (%)",
150 |         "Searches (%)",
151 |         "Eliminatons(%)",
152 |         "Hash Container",
153 |         "Hash Function",
154 |         "Execution Time (s)",
155 |         "Collision Count");
156 |     
157 |     char* argsString = (char*)malloc(sizeof(char)*100);
158 |     sprintf(argsString, "%s,%d,%ld,%d,%d,%d",
159 |                         args.distribution.c_str(),
160 |                         args.numOperations,
161 |                         keys.size(),
162 |                         args.insert,
163 |                         args.search,
164 |                         args.elimination);
165 | 
166 |     // Execution modes are hard coded since we do not expect to add new ones or modify existing ones
167 |     for(int r=0; r < args.repetitions; ++r){
168 |         for (const auto& bench : benchmarks){
169 | 
170 |             // Execute benchmark
171 |             auto start = std::chrono::system_clock::now();
172 |             executeInterweaved(bench, keys, args);
173 |             auto end = std::chrono::system_clock::now();
174 |             std::chrono::duration<double> elapsed_seconds = end-start;
175 | 
176 |             reportMetricsCSV("Interweaved",
177 |                              argsString,
178 |                              bench->getContainerName().c_str(),
179 |                              bench->getHashName().c_str(),
180 |                              elapsed_seconds.count(),
181 |                              bench->calculateCollisionCountBuckets());
182 | 
183 |             bench->clearContainer();
184 | 
185 |         }
186 |     }
187 | 
188 |     for(int r=0; r < args.repetitions; ++r){
189 |         for (const auto& bench : benchmarks){
190 | 
191 |             // Execute benchmark
192 |             auto start = std::chrono::system_clock::now();
193 |             executeBatched(bench, keys, args);
194 |             auto end = std::chrono::system_clock::now();
195 |             std::chrono::duration<double> elapsed_seconds = end-start;
196 | 
197 |             reportMetricsCSV("Batched",
198 |                              argsString,
199 |                              bench->getContainerName().c_str(),
200 |                              bench->getHashName().c_str(),
201 |                              elapsed_seconds.count(),
202 |                              bench->calculateCollisionCountBuckets());
203 | 
204 |             bench->clearContainer();
205 | 
206 |         }
207 |     }
208 |     
209 |     free(argsString);
210 |     
211 | }
212 | 
213 | void testDistribution(const std::vector<Benchmark*>& benchmarks, 
214 |                        const std::vector<std::string>& keys){
215 | 
216 |     std::unordered_set<std::string> hashFuncExecuted;
217 |     std::unordered_map<std::string,size_t> hashFuncCollisions;
218 |     printf("import numpy as np\n");
219 |     printf("distributions = {}\n");
220 |     for (const auto& bench : benchmarks){
221 |         if(hashFuncExecuted.find(bench->getHashName()) != hashFuncExecuted.end()){
222 |             continue;
223 |         }
224 |         
225 |         hashFuncExecuted.insert(bench->getHashName());
226 | 
227 |         std::vector<size_t> buckets;
228 |         auto hashFunc = bench->getHashFunction();
229 | 
230 |         for(const auto& key : keys){
231 |             size_t hashID = hashFunc(key);
232 |             buckets.push_back(hashID);
233 |         }
234 | 
235 |         std::sort(buckets.begin(), buckets.end());
236 |         printf("distributions['array_%s'] = np.array([", bench->getHashName().c_str());
237 |         for (size_t i = 0; i < buckets.size(); ++i) {
238 |             printf("%lu,",buckets[i]);
239 |         }
240 |         printf("])\n");
241 |         // Count number of replicated values in the buckets
242 |         auto newEnd = std::unique(buckets.begin(), buckets.end());
243 |         size_t numRepeated = buckets.end() - newEnd;
244 |         hashFuncCollisions[bench->getHashName()] = numRepeated;
245 |     }
246 | 
247 |     for ( const auto & [hashName, collisions] : hashFuncCollisions ) {
248 |         printf("# Hash Function: %s, Collisions: %lu\n", hashName.c_str(), collisions);
249 |     }
250 | }
251 | 
252 | void testHashPerformance(const std::vector<Benchmark*>& benchmarks, 
253 |                          const std::vector<std::string>& keys,
254 |                          const BenchmarkParameters& args){
255 | 
256 |     std::unordered_set<std::string> hashFuncExecuted;
257 |     printf("Hash Function,Elapsed Time (seconds)\n");
258 |     for (const auto& bench : benchmarks){
259 |         if(hashFuncExecuted.find(bench->getHashName()) != hashFuncExecuted.end()){
260 |             continue;
261 |         }
262 | 
263 |         for(int r=0; r < args.repetitions; ++r){
264 |             hashFuncExecuted.insert(bench->getHashName());
265 |             auto hashFunc = bench->getHashFunction();
266 | 
267 |             auto start = std::chrono::system_clock::now();
268 |             for(const auto& key : keys){
269 |                 size_t hashID = hashFunc(key);
270 |             }
271 |             auto end = std::chrono::system_clock::now();
272 |             std::chrono::duration<double> elapsed_seconds = end-start;
273 |             printf("%s,%f\n", bench->getHashName().c_str(), elapsed_seconds.count());
274 |         }
275 |     }
276 | }
277 | 
278 | void freeBenchmarks(std::vector<Benchmark*>& benchmarks){
279 |     for(auto bench : benchmarks){
280 |         delete bench;
281 |     }
282 | }
283 | 


--------------------------------------------------------------------------------
/src/sepe-runner/src/main.rs:
--------------------------------------------------------------------------------
  1 | use std::{fs, io::Write, os::unix::ffi::OsStringExt, str::FromStr};
  2 | 
  3 | use clap::{Parser, ValueEnum};
  4 | use toml::Table;
  5 | 
  6 | const TOML_FILENAME: &str = "Regexes.toml";
  7 | const KEYGEN: &str = "keygen";
  8 | const KEYUSER: &str = "keyuser";
  9 | const KEYUSER_DEBUG: &str = "keyuser-debug";
 10 | const KEYBUILDER: &str = "keybuilder";
 11 | 
 12 | #[derive(Debug, Clone, Copy, ValueEnum)]
 13 | /// Distribution to use when randomly generating the characters
 14 | enum Distribution {
 15 |     /// Uniform distribution
 16 |     Uniform,
 17 |     /// Normal distribution
 18 |     Normal,
 19 |     /// Incremental distribution (VERY SLOW)
 20 |     ///
 21 |     /// For example, a regex like [0-9]{3} will produce '001', '002', '003', and so on, in order.
 22 |     Incremental,
 23 | }
 24 | 
 25 | impl Distribution {
 26 |     fn as_str(&self) -> &'static str {
 27 |         match self {
 28 |             Distribution::Uniform => "uniform",
 29 |             Distribution::Normal => "normal",
 30 |             Distribution::Incremental => "incremental",
 31 |         }
 32 |     }
 33 | }
 34 | 
 35 | /// Helper program that stitches together the other programs in this repository
 36 | ///
 37 | /// It is meant to facilitate testing and benchmarking during development and research.
 38 | ///
 39 | /// **End users who only want to synthesize some functions do not have to interact with this**.
 40 | /// They should just use keybuilder and keysynth directly, instead
 41 | #[derive(Parser, Debug)]
 42 | #[command(author, version, name = "sepe-runner")]
 43 | struct Command {
 44 |     /// Number of keys to generate
 45 |     ///
 46 |     /// This is set to 10000 when `--histogram` is set or if distribution is 'incremental'
 47 |     #[clap(
 48 |         short,
 49 |         long,
 50 |         default_value = "1000000",
 51 |         default_value_if("histogram", "true", "10000"),
 52 |         default_value_if("distribution", "incremental", "10000")
 53 |     )]
 54 |     keys: u64,
 55 | 
 56 |     /// Distribution used in random generation
 57 |     #[clap(short, long, default_value = "uniform")]
 58 |     distribution: Distribution,
 59 | 
 60 |     /// Key generator random number generator seed
 61 |     #[clap(long, default_value = "223554")]
 62 |     keygen_seed: u64,
 63 | 
 64 |     /// Whether to run keyuser in debug mode
 65 |     #[clap(long, default_value = "false")]
 66 |     debug: bool,
 67 | 
 68 |     /// Key user random number generator seed
 69 |     #[clap(long, default_value = "9764096")]
 70 |     keyuser_seed: u64,
 71 | 
 72 |     /// Number of repetitions to forward to keyuser
 73 |     #[clap(short, long, default_value = "1")]
 74 |     repetitions: u64,
 75 | 
 76 |     /// Number of operations to run
 77 |     #[clap(short, long, default_value = "10000")]
 78 |     operations: u64,
 79 | 
 80 |     /// Percentage of insertion operations. **Must be in [0; 1] range**
 81 |     #[clap(short, long, default_value = "0.5")]
 82 |     insert: f64,
 83 | 
 84 |     /// Percentage of search operations. **Must be in [0; 1] range**
 85 |     #[clap(short, long, default_value = "0.3")]
 86 |     search: f64,
 87 | 
 88 |     /// Percentage of elimination operations. **Must be in [0; 1] range**
 89 |     #[clap(short, long, default_value = "0.2")]
 90 |     elimination: f64,
 91 | 
 92 |     /// Prints configuration and runs keyuser in verbose mode
 93 |     #[clap(short, long, default_value = "false")]
 94 |     verbose: bool,
 95 | 
 96 |     /// Suffix of output csv files.
 97 |     ///
 98 |     /// The prefix is the Regex's entry name in the Regexes.toml file.
 99 |     /// When `--histogram` is set, the default value is '_distribution.py'
100 |     #[clap(
101 |         long,
102 |         default_value = "_performance.csv",
103 |         default_value_if("histogram", "true", "_distribution.py")
104 |     )]
105 |     outfile: String,
106 | 
107 |     /// Generate the synthesized function for the given regex, do not run experiments
108 |     #[clap(long)]
109 |     synthesize: bool,
110 | 
111 |     /// Generate the distribution histogram for the given regex, do not run experiments
112 |     #[clap(long)]
113 |     histogram: bool,
114 | 
115 |     /// Measure the performance of the hash functions without containers
116 |     #[clap(long)]
117 |     hash_performance: bool,
118 | 
119 |     /// Whether to generate the keys incrementally, rather than randomly (VERY SLOW)
120 |     #[clap(long)]
121 |     incremental_generation: bool,
122 | 
123 |     /// Regexes we will benchmark, defined in Regexes.toml
124 |     ///
125 |     /// You can send multiple Regexes, separated by spaces
126 |     regexes: Vec<String>,
127 | }
128 | 
129 | fn find_file(filename: &str) -> fs::DirEntry {
130 |     let mut cwd = std::env::current_exe().expect("couldn't find out current executable path");
131 |     cwd.pop();
132 |     loop {
133 |         let entries = cwd
134 |             .read_dir()
135 |             .unwrap_or_else(|e| panic!("failed to read directory {:#?}, {e}", cwd));
136 | 
137 |         match entries
138 |             .flatten()
139 |             .find(|entry| entry.file_name().eq(filename))
140 |         {
141 |             Some(entry) => return entry,
142 |             None => {
143 |                 cwd = cwd
144 |                     .parent()
145 |                     .unwrap_or_else(|| panic!("failed to find {} file", filename))
146 |                     .into()
147 |             }
148 |         }
149 |     }
150 | }
151 | 
152 | fn read_toml_file() -> Table {
153 |     let entry = find_file(TOML_FILENAME);
154 |     let file_bytes = fs::read(entry.path())
155 |         .unwrap_or_else(|e| panic!("failed to read {:#?} file: {e}", entry.file_name()));
156 |     let file_content = std::str::from_utf8(&file_bytes)
157 |         .unwrap_or_else(|e| panic!("failed to decode {:#?} file's utf8: {e}", entry.file_name()));
158 | 
159 |     Table::from_str(file_content).expect("failed to parse toml file's content")
160 | }
161 | 
162 | fn main() {
163 |     let mut cmd = Command::parse();
164 | 
165 |     let regex_table = read_toml_file();
166 | 
167 |     if cmd.regexes.contains(&"ALL".to_string()) {
168 |         cmd.regexes.clear();
169 |         cmd.regexes = regex_table.keys().map(|key| key.to_string()).collect();
170 |     }
171 | 
172 |     for cmd_regex in cmd.regexes {
173 |         let values = match regex_table.get(&cmd_regex) {
174 |             Some(r) => r,
175 |             None => {
176 |                 eprintln!("there is {} entry in {}", cmd_regex, TOML_FILENAME);
177 |                 continue;
178 |             }
179 |         };
180 | 
181 |         let regex = values
182 |             .get("regex")
183 |             .unwrap_or_else(|| panic!("{} entry has no 'regex' value!", cmd_regex))
184 |             .as_str()
185 |             .unwrap_or_else(|| panic!("{} regex entry must be a string!", cmd_regex));
186 | 
187 |         let hashes = values
188 |             .get("hashes")
189 |             .unwrap_or_else(|| panic!("{} entry has no 'hashes' list!", cmd_regex))
190 |             .as_array()
191 |             .unwrap_or_else(|| panic!("{} hashes entry must be a list of strings!", cmd_regex));
192 | 
193 |         let keygen = find_file(KEYGEN);
194 |         let keyuser = find_file(if cmd.debug { KEYUSER_DEBUG } else { KEYUSER });
195 | 
196 |         use std::process::Command as Cmd;
197 | 
198 |         let keygen_cmd = Cmd::new(keygen.path())
199 |             .stdout(std::process::Stdio::piped())
200 |             .arg(regex)
201 |             .arg("-n")
202 |             .arg(format!("{}", cmd.keys))
203 |             .arg("-s")
204 |             .arg(format!("{}", cmd.keygen_seed))
205 |             .arg("-d")
206 |             .arg(cmd.distribution.as_str())
207 |             .spawn()
208 |             .expect("failed to spawn keygen command");
209 | 
210 |         let keygen_out = keygen_cmd.stdout.expect("failed to open keygen stdout");
211 | 
212 |         if cmd.synthesize {
213 |             let mut keybuilder_output = Cmd::new(find_file(KEYBUILDER).path())
214 |                 .stdin(std::process::Stdio::from(keygen_out))
215 |                 .output()
216 |                 .expect("failed to spawn keybuilder!");
217 | 
218 |             keybuilder_output.stdout.pop();
219 | 
220 |             let args = std::ffi::OsString::from_vec(keybuilder_output.stdout);
221 |             let args = args.to_string_lossy();
222 |             let args: Box<[String]> = args.split_whitespace().map(|e| e.to_string()).collect();
223 |             match Cmd::new(find_file("keysynth").path())
224 |                 .args(args.iter())
225 |                 .spawn()
226 |                 .expect("failed to spawn keysynth!")
227 |                 .wait()
228 |             {
229 |                 Ok(exit_status) => {
230 |                     if !exit_status.success() {
231 |                         eprintln!("ERROR: keysynth failed!");
232 |                     }
233 |                 }
234 |                 Err(e) => {
235 |                     eprintln!("ERROR: keysynth couldn't run to completion: {e}!");
236 |                 }
237 |             }
238 | 
239 |             continue;
240 |         }
241 | 
242 |         let mut keyuser_cmd = Cmd::new(keyuser.path());
243 | 
244 |         keyuser_cmd
245 |             .stdin(std::process::Stdio::from(keygen_out))
246 |             .arg("-i")
247 |             .arg(format!("{}", (cmd.insert * 100.0) as u64))
248 |             .arg("-s")
249 |             .arg(format!("{}", (cmd.search * 100.0) as u64))
250 |             .arg("-e")
251 |             .arg(format!("{}", (cmd.elimination * 100.0) as u64))
252 |             .arg("-n")
253 |             .arg(format!("{}", cmd.operations))
254 |             .arg("-seed")
255 |             .arg(format!("{}", cmd.keyuser_seed))
256 |             .arg("-r")
257 |             .arg(format!("{}", cmd.repetitions))
258 |             .arg("--distribution")
259 |             .arg(cmd.distribution.as_str());
260 | 
261 |         if cmd.verbose {
262 |             keyuser_cmd.arg("--verbose");
263 |         }
264 | 
265 |         if cmd.histogram {
266 |             keyuser_cmd.arg("--test-distribution");
267 |         }
268 | 
269 |         if cmd.hash_performance {
270 |             keyuser_cmd.arg("--hash-performance");
271 |         }
272 | 
273 |         keyuser_cmd.arg("--hashes");
274 |         for hash in hashes {
275 |             keyuser_cmd.arg(hash.as_str().unwrap_or_else(|| {
276 |                 panic!("{} hashes entry must be a list of strings!", cmd_regex)
277 |             }));
278 |         }
279 | 
280 |         println!("\nExecuting {} regex: {}", cmd_regex, regex);
281 |         if cmd.verbose {
282 |             println!("    Configuration:");
283 |             println!("        Debug: {}", cmd.debug);
284 |             println!("        Keys Generated:       {}", cmd.keys);
285 |             println!("        Number of Operations: {}", cmd.operations);
286 |             println!(
287 |                 "        [Insertion, Search, Elimination ] Percentages: [{}, {}, {}]",
288 |                 cmd.insert, cmd.search, cmd.elimination
289 |             );
290 |         }
291 | 
292 |         let keyuser_out = keyuser_cmd
293 |             .output()
294 |             .expect("failed to spawn keyuser command");
295 | 
296 |         if !keyuser_out.status.success() {
297 |             eprintln!("        !!!FAILED: {}!!!", keyuser_out.status);
298 |         } else {
299 |             let filename = cmd_regex + &cmd.outfile;
300 |             let mut outfile =
301 |                 std::fs::File::create(filename).expect("failed to create output file!");
302 |             outfile.write_all(&keyuser_out.stdout).unwrap();
303 |         }
304 |         std::io::stderr().write_all(&keyuser_out.stderr).unwrap();
305 |     }
306 | }
307 | 


--------------------------------------------------------------------------------
/src/keyuser/src/google-hashes/farm.hpp:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2014 Google, Inc.
  2 | //
  3 | // Permission is hereby granted, free of charge, to any person obtaining a copy
  4 | // of this software and associated documentation files (the "Software"), to deal
  5 | // in the Software without restriction, including without limitation the rights
  6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  7 | // copies of the Software, and to permit persons to whom the Software is
  8 | // furnished to do so, subject to the following conditions:
  9 | //
 10 | // The above copyright notice and this permission notice shall be included in
 11 | // all copies or substantial portions of the Software.
 12 | //
 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 19 | // THE SOFTWARE.
 20 | //
 21 | // FarmHash, by Geoff Pike
 22 | 
 23 | //
 24 | // http://code.google.com/p/farmhash/
 25 | //
 26 | // This file provides a few functions for hashing strings and other
 27 | // data.  All of them are high-quality functions in the sense that
 28 | // they do well on standard tests such as Austin Appleby's SMHasher.
 29 | // They're also fast.  FarmHash is the successor to CityHash.
 30 | //
 31 | // Functions in the FarmHash family are not suitable for cryptography.
 32 | //
 33 | // WARNING: This code has been only lightly tested on big-endian platforms!
 34 | // It is known to work well on little-endian platforms that have a small penalty
 35 | // for unaligned reads, such as current Intel and AMD moderate-to-high-end CPUs.
 36 | // It should work on all 32-bit and 64-bit platforms that allow unaligned reads;
 37 | // bug reports are welcome.
 38 | //
 39 | // By the way, for some hash functions, given strings a and b, the hash
 40 | // of a+b is easily derived from the hashes of a and b.  This property
 41 | // doesn't hold for any hash functions in this file.
 42 | 
 43 | #ifndef FARM_HASH_H_
 44 | #define FARM_HASH_H_
 45 | 
 46 | #include <assert.h>
 47 | #include <stdint.h>
 48 | #include <stdlib.h>
 49 | #include <string.h>   // for memcpy and memset
 50 | #include <utility>
 51 | 
 52 | #ifndef NAMESPACE_FOR_HASH_FUNCTIONS
 53 | #define NAMESPACE_FOR_HASH_FUNCTIONS util
 54 | #endif
 55 | 
 56 | namespace NAMESPACE_FOR_HASH_FUNCTIONS {
 57 | 
 58 | #if defined(FARMHASH_UINT128_T_DEFINED)
 59 | #if defined(__clang__)
 60 | #if !defined(uint128_t)
 61 | #define uint128_t __uint128_t
 62 | #endif
 63 | #endif
 64 | inline uint64_t Uint128Low64(const uint128_t x) {
 65 |   return static_cast<uint64_t>(x);
 66 | }
 67 | inline uint64_t Uint128High64(const uint128_t x) {
 68 |   return static_cast<uint64_t>(x >> 64);
 69 | }
 70 | inline uint128_t Uint128(uint64_t lo, uint64_t hi) {
 71 |   return lo + (((uint128_t)hi) << 64);
 72 | }
 73 | #else
 74 | typedef std::pair<uint64_t, uint64_t> uint128_t;
 75 | inline uint64_t Uint128Low64(const uint128_t x) { return x.first; }
 76 | inline uint64_t Uint128High64(const uint128_t x) { return x.second; }
 77 | inline uint128_t Uint128(uint64_t lo, uint64_t hi) { return uint128_t(lo, hi); }
 78 | #endif
 79 | 
 80 | 
 81 | // BASIC STRING HASHING
 82 | 
 83 | // Hash function for a byte array.
 84 | // May change from time to time, may differ on different platforms, may differ
 85 | // depending on NDEBUG.
 86 | size_t Hash(const char* s, size_t len);
 87 | 
 88 | // Hash function for a byte array.  Most useful in 32-bit binaries.
 89 | // May change from time to time, may differ on different platforms, may differ
 90 | // depending on NDEBUG.
 91 | uint32_t Hash32(const char* s, size_t len);
 92 | 
 93 | // Hash function for a byte array.  For convenience, a 32-bit seed is also
 94 | // hashed into the result.
 95 | // May change from time to time, may differ on different platforms, may differ
 96 | // depending on NDEBUG.
 97 | uint32_t Hash32WithSeed(const char* s, size_t len, uint32_t seed);
 98 | 
 99 | // Hash function for a byte array.
100 | // May change from time to time, may differ on different platforms, may differ
101 | // depending on NDEBUG.
102 | uint64_t Hash64(const char* s, size_t len);
103 | 
104 | // Hash function for a byte array.  For convenience, a 64-bit seed is also
105 | // hashed into the result.
106 | // May change from time to time, may differ on different platforms, may differ
107 | // depending on NDEBUG.
108 | uint64_t Hash64WithSeed(const char* s, size_t len, uint64_t seed);
109 | 
110 | // Hash function for a byte array.  For convenience, two seeds are also
111 | // hashed into the result.
112 | // May change from time to time, may differ on different platforms, may differ
113 | // depending on NDEBUG.
114 | uint64_t Hash64WithSeeds(const char* s, size_t len,
115 |                        uint64_t seed0, uint64_t seed1);
116 | 
117 | // Hash function for a byte array.
118 | // May change from time to time, may differ on different platforms, may differ
119 | // depending on NDEBUG.
120 | uint128_t Hash128(const char* s, size_t len);
121 | 
122 | // Hash function for a byte array.  For convenience, a 128-bit seed is also
123 | // hashed into the result.
124 | // May change from time to time, may differ on different platforms, may differ
125 | // depending on NDEBUG.
126 | uint128_t Hash128WithSeed(const char* s, size_t len, uint128_t seed);
127 | 
128 | // BASIC NON-STRING HASHING
129 | 
130 | // Hash 128 input bits down to 64 bits of output.
131 | // This is intended to be a reasonably good hash function.
132 | // May change from time to time, may differ on different platforms, may differ
133 | // depending on NDEBUG.
134 | inline uint64_t Hash128to64(uint128_t x) {
135 |   // Murmur-inspired hashing.
136 |   const uint64_t kMul = 0x9ddfea08eb382d69ULL;
137 |   uint64_t a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul;
138 |   a ^= (a >> 47);
139 |   uint64_t b = (Uint128High64(x) ^ a) * kMul;
140 |   b ^= (b >> 47);
141 |   b *= kMul;
142 |   return b;
143 | }
144 | 
145 | // FINGERPRINTING (i.e., good, portable, forever-fixed hash functions)
146 | 
147 | // Fingerprint function for a byte array.  Most useful in 32-bit binaries.
148 | uint32_t Fingerprint32(const char* s, size_t len);
149 | 
150 | // Fingerprint function for a byte array.
151 | uint64_t Fingerprint64(const char* s, size_t len);
152 | 
153 | // Fingerprint function for a byte array.
154 | uint128_t Fingerprint128(const char* s, size_t len);
155 | 
156 | // This is intended to be a good fingerprinting primitive.
157 | // See below for more overloads.
158 | inline uint64_t Fingerprint(uint128_t x) {
159 |   // Murmur-inspired hashing.
160 |   const uint64_t kMul = 0x9ddfea08eb382d69ULL;
161 |   uint64_t a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul;
162 |   a ^= (a >> 47);
163 |   uint64_t b = (Uint128High64(x) ^ a) * kMul;
164 |   b ^= (b >> 44);
165 |   b *= kMul;
166 |   b ^= (b >> 41);
167 |   b *= kMul;
168 |   return b;
169 | }
170 | 
171 | // This is intended to be a good fingerprinting primitive.
172 | inline uint64_t Fingerprint(uint64_t x) {
173 |   // Murmur-inspired hashing.
174 |   const uint64_t kMul = 0x9ddfea08eb382d69ULL;
175 |   uint64_t b = x * kMul;
176 |   b ^= (b >> 44);
177 |   b *= kMul;
178 |   b ^= (b >> 41);
179 |   b *= kMul;
180 |   return b;
181 | }
182 | 
183 | #ifndef FARMHASH_NO_CXX_STRING
184 | 
185 | // Convenience functions to hash or fingerprint C++ strings.
186 | // These require that Str::data() return a pointer to the first char
187 | // (as a const char*) and that Str::length() return the string's length;
188 | // they work with std::string, for example.
189 | 
190 | // Hash function for a byte array.
191 | // May change from time to time, may differ on different platforms, may differ
192 | // depending on NDEBUG.
193 | template <typename Str>
194 | inline size_t Hash(const Str& s) {
195 |   assert(sizeof(s[0]) == 1);
196 |   return Hash(s.data(), s.length());
197 | }
198 | 
199 | // Hash function for a byte array.  Most useful in 32-bit binaries.
200 | // May change from time to time, may differ on different platforms, may differ
201 | // depending on NDEBUG.
202 | template <typename Str>
203 | inline uint32_t Hash32(const Str& s) {
204 |   assert(sizeof(s[0]) == 1);
205 |   return Hash32(s.data(), s.length());
206 | }
207 | 
208 | // Hash function for a byte array.  For convenience, a 32-bit seed is also
209 | // hashed into the result.
210 | // May change from time to time, may differ on different platforms, may differ
211 | // depending on NDEBUG.
212 | template <typename Str>
213 | inline uint32_t Hash32WithSeed(const Str& s, uint32_t seed) {
214 |   assert(sizeof(s[0]) == 1);
215 |   return Hash32WithSeed(s.data(), s.length(), seed);
216 | }
217 | 
218 | // Hash 128 input bits down to 64 bits of output.
219 | // Hash function for a byte array.
220 | // May change from time to time, may differ on different platforms, may differ
221 | // depending on NDEBUG.
222 | template <typename Str>
223 | inline uint64_t Hash64(const Str& s) {
224 |   assert(sizeof(s[0]) == 1);
225 |   return Hash64(s.data(), s.length());
226 | }
227 | 
228 | // Hash function for a byte array.  For convenience, a 64-bit seed is also
229 | // hashed into the result.
230 | // May change from time to time, may differ on different platforms, may differ
231 | // depending on NDEBUG.
232 | template <typename Str>
233 | inline uint64_t Hash64WithSeed(const Str& s, uint64_t seed) {
234 |   assert(sizeof(s[0]) == 1);
235 |   return Hash64WithSeed(s.data(), s.length(), seed);
236 | }
237 | 
238 | // Hash function for a byte array.  For convenience, two seeds are also
239 | // hashed into the result.
240 | // May change from time to time, may differ on different platforms, may differ
241 | // depending on NDEBUG.
242 | template <typename Str>
243 | inline uint64_t Hash64WithSeeds(const Str& s, uint64_t seed0, uint64_t seed1) {
244 |   assert(sizeof(s[0]) == 1);
245 |   return Hash64WithSeeds(s.data(), s.length(), seed0, seed1);
246 | }
247 | 
248 | // Hash function for a byte array.
249 | // May change from time to time, may differ on different platforms, may differ
250 | // depending on NDEBUG.
251 | template <typename Str>
252 | inline uint128_t Hash128(const Str& s) {
253 |   assert(sizeof(s[0]) == 1);
254 |   return Hash128(s.data(), s.length());
255 | }
256 | 
257 | // Hash function for a byte array.  For convenience, a 128-bit seed is also
258 | // hashed into the result.
259 | // May change from time to time, may differ on different platforms, may differ
260 | // depending on NDEBUG.
261 | template <typename Str>
262 | inline uint128_t Hash128WithSeed(const Str& s, uint128_t seed) {
263 |   assert(sizeof(s[0]) == 1);
264 |   return Hash128(s.data(), s.length(), seed);
265 | }
266 | 
267 | // FINGERPRINTING (i.e., good, portable, forever-fixed hash functions)
268 | 
269 | // Fingerprint function for a byte array.  Most useful in 32-bit binaries.
270 | template <typename Str>
271 | inline uint32_t Fingerprint32(const Str& s) {
272 |   assert(sizeof(s[0]) == 1);
273 |   return Fingerprint32(s.data(), s.length());
274 | }
275 | 
276 | // Fingerprint 128 input bits down to 64 bits of output.
277 | // Fingerprint function for a byte array.
278 | template <typename Str>
279 | inline uint64_t Fingerprint64(const Str& s) {
280 |   assert(sizeof(s[0]) == 1);
281 |   return Fingerprint64(s.data(), s.length());
282 | }
283 | 
284 | // Fingerprint function for a byte array.
285 | template <typename Str>
286 | inline uint128_t Fingerprint128(const Str& s) {
287 |   assert(sizeof(s[0]) == 1);
288 |   return Fingerprint128(s.data(), s.length());
289 | }
290 | 
291 | #endif
292 | 
293 | }  // namespace NAMESPACE_FOR_HASH_FUNCTIONS
294 | 
295 | /* gently define FARMHASH_BIG_ENDIAN when detected big-endian machine */
296 | #if defined(__BIG_ENDIAN__)
297 |   #if !defined(FARMHASH_BIG_ENDIAN)
298 |     #define FARMHASH_BIG_ENDIAN
299 |   #endif
300 | #elif defined(__LITTLE_ENDIAN__)
301 |   // nothing for little-endian
302 | #elif defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && (__BYTE_ORDER == __ORDER_LITTLE_ENDIAN__)
303 |   // nothing for little-endian
304 | #elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && (__BYTE_ORDER == __ORDER_BIG_ENDIAN__)
305 |   #if !defined(FARMHASH_BIG_ENDIAN)
306 |     #define FARMHASH_BIG_ENDIAN
307 |   #endif
308 | #elif defined(__linux__) || defined(__CYGWIN__) || defined( __GNUC__ ) && !defined(_WIN32) || defined( __GNU_LIBRARY__ )
309 |   #include <endian.h> // libc6-dev, GLIBC
310 |   #if BYTE_ORDER == BIG_ENDIAN
311 |     #if !defined(FARMHASH_BIG_ENDIAN)
312 |       #define FARMHASH_BIG_ENDIAN
313 |     #endif
314 |   #endif
315 | #elif defined(__OpenBSD__) || defined(__NetBSD__) || defined(__FreeBSD__) || defined(__DragonFly__) || defined(__s390x__)
316 |   #include <sys/endian.h>
317 |   #if BYTE_ORDER == BIG_ENDIAN
318 |     #if !defined(FARMHASH_BIG_ENDIAN)
319 |       #define FARMHASH_BIG_ENDIAN
320 |     #endif
321 |   #endif
322 | #elif defined(_WIN32)
323 |   // Windows is (currently) little-endian
324 | #else
325 |   #error "Unable to determine endianness!"
326 | #endif /* __BIG_ENDIAN__ */
327 | 
328 | #endif  // FARM_HASH_H_
329 | 


--------------------------------------------------------------------------------
/scripts/global_keyuser_interpreter.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | # This script reads multiple .csv files produced by keyuser, extracts relevant
  4 | # information and calculate metrics to compare the different hash functions
  5 | 
  6 | import os
  7 | import argparse
  8 | import importlib
  9 | import re
 10 | 
 11 | # Data science boys
 12 | import numpy as np
 13 | import pandas as pd
 14 | import matplotlib.pyplot as plt
 15 | from scipy import stats
 16 | 
 17 | def distribution_test(key, value):
 18 | 
 19 |     n = len(value)
 20 |     chi_worst_case = ((n-1)**2) + (n-1)
 21 | 
 22 |     # Calculate the observed frequencies of each value
 23 |     observed_freq, _ = np.histogram(value, bins=len(np.unique(value)))
 24 | 
 25 |     # Calculate the expected frequencies for a uniform distribution
 26 |     expected_freq = np.full_like(observed_freq, len(value) / len(np.unique(value)))
 27 | 
 28 |     # Perform the Chi-Square Goodness of Fit Test
 29 |     try:
 30 |         chi2, p = stats.chisquare(observed_freq, expected_freq)
 31 |     except Exception:
 32 |         p = 0.0
 33 |         chi2 = chi_worst_case
 34 | 
 35 |     skewness = stats.skew(value)
 36 | 
 37 |     return pd.DataFrame(pd.DataFrame({"Hash Function": [key], "Skewness": [skewness], "Chi-Test": [chi2], "Uniform?": [p > 0.05]}))
 38 |     
 39 | def handle_distribution_analysis(args):
 40 |     try:
 41 |         arrays = {}
 42 |         for file in args.distribution:
 43 |             abs_path = os.path.abspath(file)
 44 |             spec = importlib.util.spec_from_file_location(os.path.basename(file.replace(".py","")),abs_path)
 45 |             module = importlib.util.module_from_spec(spec)
 46 |             spec.loader.exec_module(module)
 47 |             arrays[file] = module
 48 |     except ImportError:
 49 |         print(args.distribution, "is an invalid import! Please make sure that the module exists and is in the same directory as this script.")
 50 |         print("If it does not exist, you can obtain it by running keyuser with --test-distribution or bench-runner with --histogram ", args.distribution)
 51 |         exit(1)
 52 | 
 53 |     result_array = None
 54 |     for module_name, current_module in arrays.items():
 55 |         regex_name = os.path.basename(module_name).replace(".py", "")
 56 | 
 57 |         if args.hash_functions is None:
 58 |             args.hash_functions = []
 59 |             for key, _ in current_module.distributions.items():
 60 |                 args.hash_functions.append(key)
 61 | 
 62 |         result = None
 63 |         if args.plot_graph:
 64 |             file_destination = args.output_destination + regex_name + "_histogram.svg"
 65 |             _, ax = plt.subplots(figsize=(10, 5))
 66 |         
 67 |         for key, value in current_module.distributions.items():
 68 |             key = key.replace("array_", "")
 69 | 
 70 |             if result is None:
 71 |                 result = distribution_test(key, value)
 72 |             else:
 73 |                 result = pd.concat([result, distribution_test(key, value)], ignore_index=True)
 74 | 
 75 |         print(result)
 76 | 
 77 |         # Normalize "Chi-Test" column using the "STDHashSrc" as the reference
 78 |         result["Chi-Test"] = result["Chi-Test"] / result[result["Hash Function"] == "STDHashSrc"]["Chi-Test"].values[0]
 79 | 
 80 |             # Remove all entries from the dataframe that contain Simd Keyword
 81 |         result = result[~result['Hash Function'].str.contains("Simd")]
 82 |         result = result[~result['Hash Function'].str.contains("Murmur")]
 83 | 
 84 |         # Replace all instances of AbseilHash with Abseil
 85 |         result['Hash Function'] = [re.sub(r'AbseilHash.*', 'Abseil', x) for x in result['Hash Function']]
 86 |         result['Hash Function'] = [re.sub(r'FNVHash.*', 'FNV', x) for x in result['Hash Function']]
 87 |         result['Hash Function'] = [re.sub(r'CityHash.*', 'City', x) for x in result['Hash Function']]
 88 | 
 89 |         # Concatenate all entries on the dataframe HashFunction that start with the same Prefix
 90 |         result['Hash Function'] = [re.sub(r'Pext.*', 'Pext', x) for x in result['Hash Function']]
 91 |         result['Hash Function'] = [re.sub(r'OffXor.*', 'OffXor', x) for x in result['Hash Function']]
 92 |         result['Hash Function'] = [re.sub(r'Naive.*', 'Naive', x) for x in result['Hash Function']]
 93 |         result['Hash Function'] = [re.sub(r'Gperf.*', 'Gperf', x) for x in result['Hash Function']]
 94 |         result['Hash Function'] = [re.sub(r'Gpt.*', 'Gpt', x) for x in result['Hash Function']]
 95 |         result['Hash Function'] = [re.sub(r'STDHashSrc.*', 'STL', x) for x in result['Hash Function']]
 96 |         result['Hash Function'] = [re.sub(r'Aes.*', 'Aes', x) for x in result['Hash Function']]
 97 | 
 98 |         if result_array is None:
 99 |             result_array = result
100 |         else:
101 |             result_array = pd.concat([result_array, result], ignore_index=True)
102 | 
103 |     # Remove all entries from the dataframe that contain Simd Keyword
104 |     result_array = result_array.groupby("Hash Function")["Chi-Test"].sum().reset_index()
105 |     result_array["Chi-Test"] = result_array["Chi-Test"] / len(arrays)
106 | 
107 |     print("Below DataFrame from distribution file: ", args.distribution)
108 |     print(result_array)
109 |     output_path = args.output_destination + regex_name + "_chitest.csv"
110 |     print("See all results in: ", output_path)
111 |     result_array.to_csv(output_path, index=False)
112 | 
113 |     # if args.plot_graph:
114 |     #     ax.legend(loc='upper left', bbox_to_anchor=(1, 1))
115 |     #     plt.tight_layout()
116 |     #     plt.savefig(file_destination)
117 |     #     print("Histograms saved in: ", file_destination)
118 | 
119 | ########################
120 | # Performance Analysis #
121 | ########################
122 | 
123 | def mannwhitneyu_from_dataframe(args, regex_name, df, column_to_group, column_to_compare):
124 |     grouped = df.groupby(column_to_group)
125 | 
126 |     # Extract the groups from the DataFrame
127 |     groups = {}
128 |     groups = [group for group in grouped.groups]
129 | 
130 |     # Initialize an empty DataFrame
131 |     result = pd.DataFrame()
132 | 
133 |     for hashFunc1 in groups:
134 |         for hashFunc2 in groups:
135 |             if hashFunc1 == hashFunc2:
136 |                 continue
137 | 
138 |             # Calculate the Mann-Whitney U test between the first two groups
139 |             _, p = stats.mannwhitneyu(grouped.get_group(hashFunc1)[column_to_compare], grouped.get_group(hashFunc2)[column_to_compare])
140 | 
141 |             # interpret
142 |             alpha = 0.05
143 | 
144 |             # Append a new row to the DataFrame
145 |             result = pd.concat([result, pd.DataFrame({"Hash Function 1": [hashFunc1], "Hash Function 2": [hashFunc2], "p-value": [p], "Same Distribution?": [p > alpha]})], ignore_index=True)
146 | 
147 | 
148 |     
149 |     print("Below DataFrame from Regex: ", regex_name)
150 |     print(result)
151 |     column_to_compare = column_to_compare.replace(" ", "_")
152 |     output_path = args.output_destination + regex_name + column_to_compare + "_mannwhitneyu.csv"
153 |     print("See all results in: ", output_path)
154 |     result.to_csv(output_path, index=False)
155 | 
156 | 
157 | def containers_boxplot(args, df):
158 | 
159 |     # grouped = df.groupby(['Execution Mode', 'Num Operations', 'Num Keys', 'Insertions (%)', 'Searches (%)', 'Eliminatons(%)', 'Hash Container'])
160 |     # # Extract the groups from the DataFrame
161 |     # groups = {}
162 |     # groups = [group for group in grouped.groups]
163 | 
164 |     # all_data = {}
165 | 
166 |     # for group in groups:
167 |         
168 |     #     temp = grouped.get_group(group)[['Execution Time (s)', 'Collision Count']]
169 | 
170 |     #     hash_func_name = group[-1]
171 | 
172 |     #     if hash_func_name not in all_data:
173 |     #         all_data[hash_func_name] = [(temp['Execution Time (s)'].mean(), temp['Collision Count'].mean())]
174 |     #     else:
175 |     #         all_data[hash_func_name].append((temp['Execution Time (s)'].mean(), temp['Collision Count'].mean()))
176 | 
177 |     # # Geometric mean of all_data
178 |     # result = pd.DataFrame()
179 | 
180 |     # for data in all_data:
181 |     #     samples_geotime = 1.0
182 |     #     samples_collision = 1.0
183 |     #     for sample in all_data[data]:
184 |     #         samples_geotime *= sample[0]
185 |     #         if sample[1] != 0:
186 |     #             samples_collision *= sample[1]
187 |     #     samples_geotime = samples_geotime ** (1/len(all_data[data]))
188 |     #     samples_collision = samples_collision ** (1/len(all_data[data]))
189 |     #     result = pd.concat([result, pd.DataFrame({"Func Name": [data], "GeoTime": [samples_geotime], "GeoCollision": [samples_collision]})], ignore_index=True)
190 | 
191 |     # print("Below DataFrame from Regex: ", regex_name)
192 |     # print(result)
193 |     # output_path = args.output_destination + regex_name + "_geomean.csv"
194 |     # print("See all results in: ", output_path)
195 |     # result.to_csv(output_path, index=False)
196 | 
197 |     plt.rcParams['font.size'] = 14
198 |     df.boxplot(column='Execution Time (s)', by='Hash Container', rot=45, showmeans=True, showfliers=False)
199 |     plt.ylabel('Execution Time (s)')
200 |     plt.xlabel('')
201 |     plt.xticks([1, 2, 3, 4], ["U_Map", "UM_Map", "UM_Set", "U_Set"])
202 |     plt.title('')   
203 |     plt.suptitle('')
204 |     plt.tight_layout()
205 |     plt.savefig(args.output_destination+'containers.pdf')
206 | 
207 |     print("Container Boxplots saved in: ", args.output_destination+'containers.pdf')
208 | 
209 | 
210 | def performance_from_dataframe(args, df, regex_name):
211 |     grouped = df.groupby(['Execution Mode', 'Num Operations', 'Num Keys', 'Insertions (%)', 'Searches (%)', 'Eliminatons(%)', 'Hash Function'])
212 | 
213 |     # Extract the groups from the DataFrame
214 |     groups = {}
215 |     groups = [group for group in grouped.groups]
216 | 
217 |     all_data = {}
218 | 
219 |     for group in groups:
220 |         temp = grouped.get_group(group)[['Execution Time (s)', 'Collision Count']]
221 |         
222 |         hash_func_name = group[-1]
223 | 
224 |         if hash_func_name not in all_data:
225 |             all_data[hash_func_name] = [(temp['Execution Time (s)'].mean(), temp['Collision Count'].mean())]
226 |         else:
227 |             all_data[hash_func_name].append((temp['Execution Time (s)'].mean(), temp['Collision Count'].mean()))
228 | 
229 |     # Geometric mean of all_data
230 |     result = pd.DataFrame()
231 | 
232 |     for data in all_data:
233 |         samples_geotime = 1.0
234 |         samples_collision = 1.0
235 |         for sample in all_data[data]:
236 |             samples_geotime *= sample[0]
237 |             if sample[1] != 0:
238 |                 samples_collision *= sample[1]
239 |         samples_geotime = samples_geotime ** (1/len(all_data[data]))
240 |         samples_collision = samples_collision ** (1/len(all_data[data]))
241 |         result = pd.concat([result, pd.DataFrame({"Func Name": [data], "GeoTime": [samples_geotime], "GeoCollision": [samples_collision]})], ignore_index=True)
242 | 
243 |     print("Below DataFrame from Regex: ", regex_name)
244 |     print(result)
245 |     output_path = args.output_destination + regex_name + "_geomean.csv"
246 |     print("See all results in: ", output_path)
247 |     result.to_csv(output_path, index=False)
248 | 
249 | def handle_performance_analysis(args):
250 | 
251 |     # Load CSV files into pandas dataframe
252 |     csv_files = args.performance
253 |     dataframes = [pd.read_csv(file) for file in csv_files]
254 | 
255 |     regex_name = "global"
256 | 
257 |     # Concatenate dataframes
258 |     df = pd.concat(dataframes, ignore_index=True)
259 | 
260 |     # Remove all entries from the dataframe that contain Simd Keyword
261 |     df = df[~df['Hash Function'].str.contains("Simd")]
262 | 
263 |     # Replace all instances of AbseilHash with Abseil
264 |     df['Hash Function'] = [re.sub(r'AbseilHash.*', 'Abseil', x) for x in df['Hash Function']]
265 |     df['Hash Function'] = [re.sub(r'FNVHash.*', 'FNV', x) for x in df['Hash Function']]
266 |     df['Hash Function'] = [re.sub(r'CityHash.*', 'City', x) for x in df['Hash Function']]
267 | 
268 |     # Concatenate all entries on the dataframe HashFunction that start with the same Prefix
269 |     df['Hash Function'] = [re.sub(r'Pext.*', 'Pext', x) for x in df['Hash Function']]
270 |     df['Hash Function'] = [re.sub(r'OffXor.*', 'OffXor', x) for x in df['Hash Function']]
271 |     df['Hash Function'] = [re.sub(r'Naive.*', 'Naive', x) for x in df['Hash Function']]
272 |     df['Hash Function'] = [re.sub(r'Gperf.*', 'Gperf', x) for x in df['Hash Function']]
273 |     df['Hash Function'] = [re.sub(r'Gpt.*', 'Gpt', x) for x in df['Hash Function']]
274 |     df['Hash Function'] = [re.sub(r'STDHashSrc.*', 'STL', x) for x in df['Hash Function']]
275 |     df['Hash Function'] = [re.sub(r'Aes.*', 'Aes', x) for x in df['Hash Function']]
276 | 
277 |     if args.hash_performance:
278 |         # Iterate Hash Functions in the dataframe
279 |         grouped = df.groupby(['Hash Function'])
280 |         for hashFunc in grouped.groups:
281 |             mean = grouped.get_group(hashFunc)['Elapsed Time (seconds)'].mean()
282 |             mean = mean * 1000
283 |             print(f"{hashFunc},{mean:.4f}")
284 |         return
285 | 
286 |     # Calculate the Mann-Whitney U test
287 |     mannwhitneyu_from_dataframe(args, regex_name, df, 'Hash Function', 'Execution Time (s)')
288 |     mannwhitneyu_from_dataframe(args, regex_name, df, 'Hash Function', 'Collision Count')
289 | 
290 |     if args.rq6:
291 |         containers_boxplot(args, df)
292 | 
293 |     performance_from_dataframe(args, df, regex_name)
294 | 
295 |     plt.rcParams['font.size'] = 14
296 |     df.boxplot(column='Collision Count', by='Hash Function', rot=45, showmeans=True, showfliers=False)
297 |     plt.ylabel('Collision Count')
298 |     plt.xlabel('')
299 |     plt.title('')   
300 |     plt.suptitle('')
301 |     plt.tight_layout()
302 |     plt.savefig(args.output_destination+'global_collision_count.pdf')
303 |     print("Collision Count Boxplots saved in: ", args.output_destination+'global_collision_count.pdf')
304 | 
305 |     df = df[df['Hash Function'] != 'Gperf']
306 |     df.boxplot(column='Execution Time (s)', by='Hash Function', rot=45, showmeans=True, showfliers=False)
307 |     plt.ylabel('Execution Time (s)')
308 |     plt.xlabel('')
309 |     plt.title('')
310 |     plt.suptitle('')
311 |     plt.tight_layout()
312 |     plt.savefig(args.output_destination+'global_performance.pdf')
313 |     print("Execution Time Boxplots saved in: ", args.output_destination+'global_performance.pdf')
314 | 
315 | def main():
316 |     parser = argparse.ArgumentParser(description="Keyuser Interpreter")
317 |     parser.add_argument("-d", "--distribution", nargs='*', type=str, default="", help="Name of the distribution files to interpret. Exclusive with -p option.")
318 |     parser.add_argument("-p", "--performance", nargs='*', type=str, default="", help="Name of the csv performance files to interpret. Exclusive with -d option.")
319 |     parser.add_argument("-hp", "--hash-performance", action='store_true', help="Name of the csv performance files to interpret.")
320 |     parser.add_argument("-rq6", action='store_true', help="Group performance by data structure.")
321 |     parser.add_argument("-pg", "--plot-graph", action='store_true', help="Option to plot the results in graphs.")
322 |     parser.add_argument("-od", "--output-destination", type=str, default="results/", help="Output path to output graphs. Default is current file.")
323 |     parser.add_argument("-fp", "--full-print", action='store_true', help="Print the entire dataframe.")
324 |     parser.add_argument("-hf", "--hash-functions", nargs='*', type=str, help="Name of the hash functions to analyze.")
325 | 
326 |     args = parser.parse_args()
327 | 
328 |     if args.full_print:
329 |         pd.set_option('display.max_rows', None)
330 |         pd.set_option('display.max_columns', None)
331 |         pd.set_option('display.width', None)
332 |         pd.set_option('display.max_colwidth', None)
333 | 
334 |     if args.performance:
335 |         handle_performance_analysis(args)
336 |     elif args.distribution:
337 |         handle_distribution_analysis(args)
338 | 
339 | main()
340 | 


--------------------------------------------------------------------------------
/src/keyuser/src/benchmarks.hpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * @file benchmarks.hpp
  3 |  * @brief This file contains functions to execute and manage hash function benchmarks.
  4 |  */
  5 | 
  6 | #ifndef BENCHMARKS_HPP
  7 | #define BENCHMARKS_HPP
  8 | 
  9 | #include <string>
 10 | #include <unordered_map>
 11 | #include <unordered_set>
 12 | #include <functional>
 13 | 
 14 | /**
 15 |  * @brief Base class for benchmarking.
 16 |  */
 17 | class Benchmark{
 18 | 
 19 |     const std::string containerName; ///< The name of the container used in the benchmark.
 20 |     const std::string hashName; ///< The name of the hash function used in the benchmark.
 21 | 
 22 |     protected:
 23 | 
 24 |         /**
 25 |          * @brief Calculates the number of collisions in the buckets of a given unordered container.
 26 |          * 
 27 |          * @tparam UnorderedContainer The type of the unordered container.
 28 |          * @param container The unordered container to calculate collisions for.
 29 |          * @return The number of collisions.
 30 |          */
 31 |         template <typename UnorderedContainer>
 32 |         int internalcalculateCollisionCountBuckets(const UnorderedContainer& container) {
 33 |             // STD Containers may have multiple keys inside the same bucket, even though they have different hashes :(
 34 |             int colcount = 0;
 35 |             int empty = 0;
 36 |             for (size_t bucket = 0; bucket < container.bucket_count(); ++bucket) {
 37 |                 if (container.bucket_size(bucket) > 1) {
 38 |                     colcount += container.bucket_size(bucket) - 1;
 39 |                 } else {
 40 |                     ++empty;
 41 |                 }
 42 |             }
 43 |             return colcount;
 44 |         }
 45 | 
 46 |     public:
 47 |         /**
 48 |          * @brief Construct a new Benchmark object.
 49 |          * 
 50 |          * @param _containerName The name of the container used in the benchmark.
 51 |          * @param _hashName The name of the hash function used in the benchmark.
 52 |          */
 53 |         Benchmark(const std::string& _containerName, const std::string& _hashName) : 
 54 |             containerName(_containerName),
 55 |             hashName(_hashName)
 56 |             {}
 57 | 
 58 |         /**
 59 |          * @brief Get the name of the container used in the benchmark.
 60 |          * 
 61 |          * @return The name of the container.
 62 |          */
 63 |         std::string getContainerName(){ return containerName; }
 64 | 
 65 |         /**
 66 |          * @brief Get the name of the hash function used in the benchmark.
 67 |          * 
 68 |          * @return The name of the hash function.
 69 |          */
 70 |         std::string getHashName(){ return hashName; }
 71 | 
 72 |         /**
 73 |          * @brief Virtual destructor.
 74 |          */
 75 |         virtual ~Benchmark() {}
 76 | 
 77 |         /**
 78 |          * @brief Insert a key into the container. Must be implemented by derived classes.
 79 |          * 
 80 |          * @param key The key to insert.
 81 |          */
 82 |         virtual void insert(const std::string& key) = 0;
 83 | 
 84 |         /**
 85 |          * @brief Search for a key in the container. Must be implemented by derived classes.
 86 |          * 
 87 |          * @param key The key to search for.
 88 |          * @return true If the key is found.
 89 |          * @return false Otherwise.
 90 |          */
 91 |         virtual bool search(const std::string& key) = 0;
 92 | 
 93 |         /**
 94 |          * @brief Remove a key from the container. Must be implemented by derived classes.
 95 |          * 
 96 |          * @param key The key to remove.
 97 |          */
 98 |         virtual void elimination(const std::string& key) = 0;
 99 | 
100 |         /**
101 |          * @brief Calculate the number of collision buckets in the container. Must be implemented by derived classes.
102 |          * 
103 |          * @return The number of collision buckets.
104 |          */
105 |         virtual int calculateCollisionCountBuckets(void) = 0;
106 | 
107 |         /**
108 |          * @brief Clear the container benchmark between executions. Must be implemented by derived classes.
109 |          */
110 |         virtual void clearContainer(void) = 0;
111 | 
112 |         /**
113 |          * @brief Get the hash function used by the container. Must be implemented by derived classes.
114 |          * 
115 |          * @return The hash function.
116 |          */
117 |         virtual std::function<std::size_t(const std::string&)> getHashFunction(void) = 0;
118 | };
119 | 
120 | /**
121 |  * @brief A benchmarking class for unordered map with a custom hash function for std::string.
122 |  * 
123 |  * @tparam HashFuncT The type of the hash function.
124 |  */
125 | template <typename HashFuncT>
126 | class UnorderedMapBench : public Benchmark{
127 |     std::unordered_map<std::string, int, HashFuncT> map; ///< The unordered map used for benchmarking.
128 |     HashFuncT hashFunctor; ///< The hash function object.
129 | 
130 |     public:
131 |         /**
132 |          * @brief Construct a new Unordered Map Bench object.
133 |          * 
134 |          * @param _name The name of the benchmark.
135 |          * @param _hashName The name of the hash function.
136 |          */
137 |         UnorderedMapBench(std::string _name, std::string _hashName) : 
138 |             Benchmark(_name, _hashName)
139 |             {}
140 | 
141 |         /**
142 |          * @brief Insert a key into the unordered map.
143 |          * 
144 |          * @param key The key to insert.
145 |          */
146 |         void insert(const std::string& key) override {
147 |             map[key] = 0;
148 |         }
149 | 
150 |         /**
151 |          * @brief Search for a key in the unordered map.
152 |          * 
153 |          * @param key The key to search for.
154 |          * @return true If the key is found.
155 |          * @return false Otherwise.
156 |          */
157 |         bool search(const std::string& key) override {
158 |             return map.find(key) != map.end();
159 |         }
160 | 
161 |         /**
162 |          * @brief Remove a key from the unordered map.
163 |          * 
164 |          * @param key The key to remove.
165 |          */
166 |         void elimination(const std::string& key) override {
167 |             map.erase(key);
168 |         }
169 | 
170 |         /**
171 |          * @brief Calculate the number of collision buckets in the unordered map.
172 |          * 
173 |          * @return int The number of collision buckets.
174 |          */
175 |         int calculateCollisionCountBuckets(void) override {
176 |             return internalcalculateCollisionCountBuckets(map);
177 |         }
178 | 
179 |         /**
180 |          * @brief Clear the container benchmark between executions.
181 |          */
182 |         void clearContainer(void) override{
183 |             map.clear();
184 |         }
185 | 
186 |         /**
187 |          * @brief Get the hash function used by the unordered map.
188 |          * 
189 |          * @return std::function<std::size_t(const std::string&)> The hash function.
190 |          */
191 |         std::function<std::size_t(const std::string&)> getHashFunction(void) override {
192 |             return map.hash_function();
193 |         }
194 | };
195 | 
196 | /**
197 |  * @brief A benchmarking class for unordered multimap with a custom hash function for std::string.
198 |  * 
199 |  * @tparam HashFuncT The type of the hash function.
200 |  */
201 | template <typename HashFuncT>
202 | class UnorderedMultiMapBench : public Benchmark{
203 |     std::unordered_multimap<std::string, int, HashFuncT> mmap; ///< The unordered multimap used for benchmarking.
204 | 
205 |     public:
206 |         /**
207 |          * @brief Construct a new Unordered Multi Map Bench object.
208 |          * 
209 |          * @param _name The name of the benchmark.
210 |          * @param _hashName The name of the hash function.
211 |          */
212 |         UnorderedMultiMapBench(std::string _name, std::string _hashName) : 
213 |             Benchmark(_name, _hashName)
214 |             {}
215 |             
216 |         /**
217 |          * @brief Insert a key into the unordered multimap.
218 |          * 
219 |          * @param key The key to insert.
220 |          */
221 |         void insert(const std::string& key) override {
222 |             mmap.insert(std::make_pair(key, 0));
223 |         }
224 | 
225 |         /**
226 |          * @brief Search for a key in the unordered multimap.
227 |          * 
228 |          * @param key The key to search for.
229 |          * @return true If the key is found.
230 |          * @return false Otherwise.
231 |          */
232 |         bool search(const std::string& key) override {
233 |             return mmap.find(key) != mmap.end();
234 |         }
235 | 
236 |         /**
237 |          * @brief Remove a key from the unordered multimap.
238 |          * 
239 |          * @param key The key to remove.
240 |          */
241 |         void elimination(const std::string& key) override {
242 |             mmap.erase(key);
243 |         }
244 | 
245 |         /**
246 |          * @brief Calculate the number of collision buckets in the unordered multimap.
247 |          * 
248 |          * @return int The number of collision buckets.
249 |          */
250 |         int calculateCollisionCountBuckets(void) override {
251 |             return internalcalculateCollisionCountBuckets(mmap);
252 |         }
253 | 
254 |         /**
255 |          * @brief Clear the container benchmark between executions.
256 |          */
257 |         void clearContainer(void) override{
258 |             mmap.clear();
259 |         }
260 |         
261 |         /**
262 |          * @brief Get the hash function used by the unordered multimap.
263 |          * 
264 |          * @return std::function<std::size_t(const std::string&)> The hash function.
265 |          */
266 |         std::function<std::size_t(const std::string&)> getHashFunction(void) override {
267 |             return mmap.hash_function();
268 |         }
269 | };
270 | 
271 | /**
272 |  * @brief A benchmarking class for unordered set with a custom hash function for std::string.
273 |  * 
274 |  * @tparam HashFuncT The type of the hash function.
275 |  */
276 | template <typename HashFuncT>
277 | class UnorderedSetBench : public Benchmark{
278 |     std::unordered_set<std::string, HashFuncT> set; ///< The unordered set used for benchmarking.
279 | 
280 |     public:
281 |         /**
282 |          * @brief Construct a new Unordered Set Bench object.
283 |          * 
284 |          * @param _name The name of the benchmark.
285 |          * @param _hashName The name of the hash function.
286 |          */
287 |         UnorderedSetBench(std::string _name, std::string _hashName) : 
288 |             Benchmark(_name, _hashName)
289 |             {}
290 | 
291 |         /**
292 |          * @brief Insert a key into the unordered set.
293 |          * 
294 |          * @param key The key to insert.
295 |          */
296 |         void insert(const std::string& key) override {
297 |             set.insert(key);
298 |         }
299 | 
300 |         /**
301 |          * @brief Search for a key in the unordered set.
302 |          * 
303 |          * @param key The key to search for.
304 |          * @return true If the key is found.
305 |          * @return false Otherwise.
306 |          */
307 |         bool search(const std::string& key) override {
308 |             return set.find(key) != set.end();
309 |         }
310 | 
311 |         /**
312 |          * @brief Remove a key from the unordered set.
313 |          * 
314 |          * @param key The key to remove.
315 |          */
316 |         void elimination(const std::string& key) override {
317 |             set.erase(key);
318 |         }
319 | 
320 |         /**
321 |          * @brief Calculate the number of collision buckets in the unordered set.
322 |          * 
323 |          * @return int The number of collision buckets.
324 |          */
325 |         int calculateCollisionCountBuckets(void) override {
326 |             return internalcalculateCollisionCountBuckets(set);
327 |         }
328 | 
329 |         /**
330 |          * @brief Clear the container benchmark between executions.
331 |          */
332 |         void clearContainer(void) override{
333 |             set.clear();
334 |         }
335 | 
336 |         /**
337 |          * @brief Get the hash function used by the unordered set.
338 |          * 
339 |          * @return std::function<std::size_t(const std::string&)> The hash function.
340 |          */
341 |         std::function<std::size_t(const std::string&)> getHashFunction(void) override {
342 |             return set.hash_function();
343 |         }
344 | };
345 | 
346 | /**
347 |  * @brief A benchmarking class for unordered multiset with a custom hash function for std::string.
348 |  * 
349 |  * @tparam HashFuncT The type of the hash function.
350 |  */
351 | template <typename HashFuncT>
352 | class UnorderedMultisetBench : public Benchmark{
353 |     std::unordered_multiset<std::string, HashFuncT> mset; ///< The unordered multiset used for benchmarking.
354 | 
355 |     public:
356 |         /**
357 |          * @brief Construct a new Unordered Multiset Bench object.
358 |          * 
359 |          * @param _name The name of the benchmark.
360 |          * @param _hashName The name of the hash function.
361 |          */
362 |         UnorderedMultisetBench(std::string _name, std::string _hashName) : 
363 |             Benchmark(_name, _hashName)
364 |             {}
365 | 
366 |         /**
367 |          * @brief Insert a key into the unordered multiset.
368 |          * 
369 |          * @param key The key to insert.
370 |          */
371 |         void insert(const std::string& key) override {
372 |             mset.insert(key);
373 |         }
374 | 
375 |         /**
376 |          * @brief Search for a key in the unordered multiset.
377 |          * 
378 |          * @param key The key to search for.
379 |          * @return true If the key is found.
380 |          * @return false Otherwise.
381 |          */
382 |         bool search(const std::string& key) override {
383 |             return mset.find(key) != mset.end();
384 |         }
385 | 
386 |         /**
387 |          * @brief Remove a key from the unordered multiset.
388 |          * 
389 |          * @param key The key to remove.
390 |          */
391 |         void elimination(const std::string& key) override {
392 |             mset.erase(key);
393 |         }
394 | 
395 |         /**
396 |          * @brief Calculate the number of collision buckets in the unordered multiset.
397 |          * 
398 |          * @return int The number of collision buckets.
399 |          */
400 |         int calculateCollisionCountBuckets(void) override {
401 |             return internalcalculateCollisionCountBuckets(mset);
402 |         }
403 |         
404 |         /**
405 |          * @brief Clear the container benchmark between executions.
406 |          */
407 |         void clearContainer(void) override{
408 |             mset.clear();
409 |         }
410 | 
411 |         /**
412 |          * @brief Get the hash function used by the unordered multiset.
413 |          * 
414 |          * @return std::function<std::size_t(const std::string&)> The hash function.
415 |          */
416 |         std::function<std::size_t(const std::string&)> getHashFunction(void) override {
417 |             return mset.hash_function();
418 |         }
419 | };
420 | 
421 | /**
422 |  * @brief Struct to hold parameters for benchmarking.
423 |  */
424 | struct BenchmarkParameters{
425 |     std::vector<std::string> hashesToRun; ///< Vector of hash functions to run.
426 |     int insert          = -1; ///< Number of insert operations.
427 |     int search          = -1; ///< Number of search operations.
428 |     int elimination     = -1; ///< Number of elimination operations.
429 |     int numOperations   = -1; ///< Total number of operations.
430 |     int seed            = 223554; ///< Seed for random number generation. Chosen by a fair dice roll.
431 |     int repetitions     = 1; ///< Number of repetitions for each benchmark.
432 |     bool verbose        = false; ///< Verbose output flag.
433 |     bool testDistribution = false; ///< Flag to test distribution.
434 |     bool hashPerformance = false; ///< Flag to test hash performance.
435 |     std::string distribution = "normal"; ///< Distribution to use for testing.
436 | };
437 | 
438 | /**
439 |  * @brief Execute a benchmark with interweaved operations with 50% insertions warm-up.
440 |  * 
441 |  * @param bench The benchmark to execute.
442 |  * @param keys The keys to use in the benchmark.
443 |  * @param args The parameters for the benchmark.
444 |  */
445 | void executeInterweaved(Benchmark* bench,
446 |                         const std::vector<std::string>& keys, 
447 |                         const BenchmarkParameters& args);
448 | 
449 | /**
450 |  * @brief Execute a benchmark with batched operations.
451 |  * 
452 |  * @param bench The benchmark to execute.
453 |  * @param keys The keys to use in the benchmark.
454 |  * @param args The parameters for the benchmark.
455 |  */
456 | void executeBatched(Benchmark* bench, 
457 |                     const std::vector<std::string>& keys,
458 |                     const BenchmarkParameters& args);
459 | 
460 | /**
461 |  * @brief Execute a set of benchmarks and prints to standard output the performance and collision results in csv format.
462 |  * 
463 |  * @param benchmarks The benchmarks to execute.
464 |  * @param keys The keys to use in the benchmarks.
465 |  * @param args The parameters for the benchmarks.
466 |  */
467 | void benchmarkExecutor(const std::vector<Benchmark*>& benchmarks, 
468 |                        const std::vector<std::string>& keys, 
469 |                        const BenchmarkParameters& args);
470 | 
471 | /**
472 |  * @brief Tests the distribution of benchmarks. 
473 |  * 
474 |  * This function takes a vector of benchmarks and a vector of keys. It tests the distribution of the benchmarks
475 |  * according to the keys provided.
476 |  * 
477 |  * Prints to standard output a python numpy array containing all sorted hashed values.
478 |  *
479 |  * @param benchmarks A vector of pointers to Benchmark objects to be tested.
480 |  * @param keys A vector of keys according to which the benchmarks are to be distributed.
481 |  */
482 | void testDistribution(const std::vector<Benchmark*>& benchmarks, 
483 |                        const std::vector<std::string>& keys);
484 | 
485 | /**
486 |  * @brief Tests the performance of a hash function.
487 |  * 
488 |  * This function takes a vector of keys and benchmark parameters as input, 
489 |  * and prints to standard output the elapsed time in seconds to hash all informed keys.
490 |  *
491 |  * @param keys A constant reference to a vector of strings that represent the keys to be hashed.
492 |  * @param args A constant reference to the BenchmarkParameters object that contains the parameters for the benchmark.
493 |  */
494 | void testHashPerformance(const std::vector<Benchmark*>& benchmarks, 
495 |                          const std::vector<std::string>& keys,
496 |                          const BenchmarkParameters& args);
497 | 
498 | /**
499 |  * @brief Frees the memory allocated for the benchmarks.
500 |  * 
501 |  * This function takes a vector of benchmarks and deallocates the memory used by these benchmarks.
502 |  *
503 |  * @param benchmarks A vector of pointers to Benchmark objects to be freed.
504 |  */
505 | void freeBenchmarks(std::vector<Benchmark*>& benchmarks);
506 | 
507 | #endif
508 | 


--------------------------------------------------------------------------------