├── Makefile ├── README.md ├── lits ├── hot_src │ ├── include │ │ ├── CStringComparator.hpp │ │ ├── PartialKeyConversionInformation.hpp │ │ ├── IdentityKeyExtractor.hpp │ │ ├── NodeAllocationInformation.hpp │ │ ├── KeyComparator.hpp │ │ ├── EntriesRange.hpp │ │ ├── ContentEquals.hpp │ │ ├── BiNodeInterface.hpp │ │ ├── SearchResultForInsert.hpp │ │ ├── BiNode.hpp │ │ ├── TwoEntriesNode.hpp │ │ ├── BiNodeInformation.hpp │ │ ├── PartialKeyMappingBase.hpp │ │ ├── PartialKeyMappingHelpers.hpp │ │ ├── OptionalValue.hpp │ │ ├── InsertInformation.hpp │ │ ├── TidConverters.hpp │ │ ├── NodeAllocationInformations.hpp │ │ ├── NodeType.hpp │ │ ├── DiscriminativeBit.hpp │ │ ├── KeyUtilities.hpp │ │ ├── NodeParametersMapping.hpp │ │ ├── Algorithms.hpp │ │ ├── NodeMergeInformation.hpp │ │ ├── BitMask32.hpp │ │ ├── SingleMaskPartialKeyMappingInterface.hpp │ │ ├── SIMDHelper.hpp │ │ ├── MultiMaskPartialKeyMappingInterface.hpp │ │ └── SingleMaskPartialKeyMapping.hpp │ ├── HOTSingleThreadedInsertStackEntry.hpp │ ├── HOTSingleThreadedNodeBase.hpp │ ├── MemoryPool.hpp │ ├── HOTSingleThreadedDeletionInformation.hpp │ ├── HOTSingleThreadedNodeBaseInterface.hpp │ ├── HOTSingleThreadedIterator.hpp │ ├── HOTSingleThreadedChildPointerInterface.hpp │ └── HOTSingleThreadedChildPointer.hpp ├── lits_entry.hpp ├── lits_base.hpp ├── lits_utils.hpp ├── lits_hot.hpp ├── lits_model.hpp ├── lits_kv.hpp ├── lits.hpp └── lits_gpkl.hpp ├── genId.hpp ├── example.cpp └── testbench.cpp /Makefile: -------------------------------------------------------------------------------- 1 | CXX = g++ 2 | CXXFLAGS = -std=c++14 -march=native -w -g -O3 3 | 4 | all: example testbench 5 | 6 | example: example.cpp 7 | $(CXX) $(CXXFLAGS) $< -o $@ 8 | 9 | testbench: testbench.cpp 10 | $(CXX) $(CXXFLAGS) $< -o $@ 11 | 12 | .PHONY: clean 13 | clean: 14 | rm -f example testbench 15 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # LITS 2 | 3 | LITS is a learned index optimized for string keys. 4 | 5 | ## Build and Run 6 | 7 | To run a simple example: 8 | 9 | ```shell 10 | $ make example 11 | 12 | $ ./example 13 | ``` 14 | 15 | To run simple benchmarks: 16 | 17 | ```shell 18 | $ make testbench 19 | 20 | # can be 'idcards' or 'randstr' 21 | 22 | # Case 1: search only test 23 | $ ./testbench 1 24 | 25 | # Case 2: insert only test 26 | $ ./testbench 2 27 | 28 | # Case 3: scan only test 29 | $ ./testbench 3 30 | ``` 31 | -------------------------------------------------------------------------------- /lits/hot_src/include/CStringComparator.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __IDX__CONTENTHELPERS__C_STRING_COMPARATOR__HPP__ 2 | #define __IDX__CONTENTHELPERS__C_STRING_COMPARATOR__HPP__ 3 | 4 | #include 5 | 6 | /** @author robert.binna@uibk.ac.at */ 7 | 8 | namespace idx { namespace contenthelpers { 9 | 10 | /** 11 | * Comparator which lexicographically compares two c-strings 12 | */ 13 | class CStringComparator { 14 | public: 15 | inline bool operator()(const char* first, const char* second) const { 16 | return strcmp(first, second) < 0; 17 | }; 18 | }; 19 | 20 | } } 21 | 22 | #endif 23 | 24 | -------------------------------------------------------------------------------- /lits/hot_src/include/PartialKeyConversionInformation.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __HOT__COMMONS__GENERIC_MASK_CONVERSION_INFORMATION___ 2 | #define __HOT__COMMONS__GENERIC_MASK_CONVERSION_INFORMATION___ 3 | 4 | namespace hot { namespace commons { 5 | 6 | template struct PartialKeyConversionInformation { 7 | PartialKeyType const mAdditionalMask; 8 | PartialKeyType const mConversionMask; 9 | 10 | PartialKeyConversionInformation(PartialKeyType const additionalMask, PartialKeyType const conversionMask) : mAdditionalMask(additionalMask), mConversionMask(conversionMask) { 11 | } 12 | }; 13 | 14 | } } 15 | 16 | #endif -------------------------------------------------------------------------------- /lits/hot_src/include/IdentityKeyExtractor.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __IDX__CONTENTHELPERS__IDENTITY_KEY_EXTRACTOR__ 2 | #define __IDX__CONTENTHELPERS__IDENTITY_KEY_EXTRACTOR__ 3 | 4 | /** @author robert.binna@uibk.ac.at */ 5 | 6 | namespace idx { namespace contenthelpers { 7 | 8 | /** 9 | * A trivial key Extractor which returns the value itself as the extracted key 10 | * 11 | * @tparam ValueType 12 | */ 13 | template 14 | struct IdentityKeyExtractor { 15 | typedef ValueType KeyType; 16 | 17 | inline KeyType operator()(ValueType const &value) const { 18 | return value; 19 | } 20 | }; 21 | 22 | } } 23 | 24 | #endif -------------------------------------------------------------------------------- /lits/hot_src/include/NodeAllocationInformation.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __HOT__ROWEX__NODE_ALLOCATION_INFORMATION__ 2 | #define __HOT__ROWEX__NODE_ALLOCATION_INFORMATION__ 3 | 4 | #include 5 | 6 | namespace hot { namespace commons { 7 | 8 | struct NodeAllocationInformation { 9 | uint32_t const mEntriesMask; 10 | uint16_t const mTotalSizeInBytes; 11 | uint16_t const mPointerOffset; 12 | 13 | NodeAllocationInformation(uint32_t entriesMask, uint16_t totalSizeInBytes, uint16_t pointerOffset) 14 | : mEntriesMask(entriesMask), mTotalSizeInBytes(totalSizeInBytes), mPointerOffset(pointerOffset) 15 | { 16 | } 17 | }; 18 | 19 | }} 20 | 21 | #endif -------------------------------------------------------------------------------- /lits/hot_src/include/KeyComparator.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __IDX__CONTENTHELPERS__KEY_COMPARATOR__HPP__ 2 | #define __IDX__CONTENTHELPERS__KEY_COMPARATOR__HPP__ 3 | 4 | /** @author robert.binna@uibk.ac.at */ 5 | 6 | #include "CStringComparator.hpp" 7 | 8 | namespace idx { namespace contenthelpers { 9 | 10 | /** 11 | * Helper template which allows to derive a comparator function for a given KeyType 12 | * 13 | * @tparam the type of the key to compare 14 | */ 15 | template struct KeyComparator { 16 | using type = std::less; 17 | }; 18 | 19 | template<> struct KeyComparator { 20 | using type = idx::contenthelpers::CStringComparator; 21 | }; 22 | 23 | } } 24 | 25 | #endif -------------------------------------------------------------------------------- /lits/hot_src/include/EntriesRange.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __HOT__COMMONS__ENTRIES_RANGE__ 2 | #define __HOT__COMMONS__ENTRIES_RANGE__ 3 | 4 | #include 5 | 6 | namespace hot { namespace commons { 7 | 8 | /** 9 | * Describes a range of entries by its start index and the number of entries contained. 10 | */ 11 | struct EntriesRange { 12 | uint32_t const mFirstIndexInRange; 13 | uint32_t const mNumberEntriesInRange; 14 | 15 | EntriesRange(uint32_t firstIndexInRange, uint32_t numberEntriesInRange) 16 | : mFirstIndexInRange(firstIndexInRange), mNumberEntriesInRange(numberEntriesInRange) 17 | { 18 | } 19 | 20 | uint32_t inline getLastIndexInRange() const { 21 | return mFirstIndexInRange + mNumberEntriesInRange - 1; 22 | } 23 | }; 24 | 25 | }} 26 | 27 | #endif -------------------------------------------------------------------------------- /lits/hot_src/include/ContentEquals.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __IDX__CONTENTHELPERS__CONTENT_EQUALS__ 2 | #define __IDX__CONTENTHELPERS__CONTENT_EQUALS__ 3 | 4 | /** @author robert.binna@uibk.ac.at */ 5 | 6 | #include 7 | 8 | namespace idx { namespace contenthelpers { 9 | 10 | /** 11 | * checks value equality for two given values 12 | * template specializations allow to adapt its behaviour for specific types. 13 | * For instance a specialization exist which compares two c-strings by using strcmp. 14 | */ 15 | template __attribute__((always_inline)) inline bool contentEquals(Value value1, Value value2) { 16 | return value1 == value2; 17 | } 18 | 19 | template<> __attribute__((always_inline)) inline bool contentEquals(char const* value1, char const* value2) { 20 | return strcmp(value1, value2) == 0; 21 | } 22 | 23 | } } 24 | 25 | #endif -------------------------------------------------------------------------------- /lits/hot_src/HOTSingleThreadedInsertStackEntry.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __HOT__SINGLE_THREADED__HOT_SINGLE_THREADED_INSERT_STACK_ENTRY___ 2 | #define __HOT__SINGLE_THREADED__HOT_SINGLE_THREADED_INSERT_STACK_ENTRY___ 3 | 4 | #include "include/SearchResultForInsert.hpp" 5 | 6 | #include "HOTSingleThreadedChildPointer.hpp" 7 | 8 | namespace hot { namespace singlethreaded { 9 | 10 | struct HOTSingleThreadedInsertStackEntry { 11 | HOTSingleThreadedChildPointer *mChildPointer; 12 | hot::commons::SearchResultForInsert mSearchResultForInsert; 13 | 14 | inline void initLeaf(HOTSingleThreadedChildPointer * childPointer) { 15 | mChildPointer = childPointer; 16 | //important for finding the correct depth!! 17 | mSearchResultForInsert.mMostSignificantBitIndex = UINT16_MAX; 18 | } 19 | 20 | //PERFORMANCE this must be uninitialized 21 | inline HOTSingleThreadedInsertStackEntry() { 22 | } 23 | }; 24 | 25 | } } 26 | 27 | #endif -------------------------------------------------------------------------------- /lits/hot_src/include/BiNodeInterface.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __HOT__COMMONS__BI_NODE_INTERFACE___ 2 | #define __HOT__COMMONS__BI_NODE_INTERFACE___ 3 | 4 | namespace hot { namespace commons { 5 | 6 | template struct BiNode { 7 | uint16_t mDiscriminativeBitIndex; 8 | uint16_t mHeight; 9 | ChildPointerType mLeft; 10 | ChildPointerType mRight; 11 | 12 | inline BiNode() { 13 | //is intentionally left undefined for performance reasons!! 14 | } 15 | 16 | inline BiNode(BiNode const & other) = default; 17 | inline BiNode & operator=(BiNode const & other) = default; 18 | 19 | inline BiNode(uint16_t const discriminativeBitIndex, uint16_t const height, ChildPointerType const & left, ChildPointerType const & right); 20 | inline static BiNode createFromExistingAndNewEntry(DiscriminativeBit const & discriminativeBit, ChildPointerType const & existingNode, ChildPointerType const & newEntry); 21 | }; 22 | 23 | }} 24 | 25 | #endif -------------------------------------------------------------------------------- /lits/hot_src/include/SearchResultForInsert.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __HOT__COMMONS__SEARCH_RESULT_FOR_INSERT___ 2 | #define __HOT__COMMONS__SEARCH_RESULT_FOR_INSERT___ 3 | 4 | #include 5 | 6 | namespace hot { namespace commons { 7 | 8 | /** 9 | * A Helper Function for storing additional result information: 10 | * - the index of the return entry 11 | * - the most significant bit index of the containing node 12 | */ 13 | struct SearchResultForInsert { 14 | uint32_t mEntryIndex; 15 | uint16_t mMostSignificantBitIndex; 16 | 17 | inline SearchResultForInsert(uint32_t entryIndex, uint16_t mostSignificantBitIndex) 18 | : mEntryIndex(entryIndex), mMostSignificantBitIndex(mostSignificantBitIndex) { 19 | } 20 | 21 | inline SearchResultForInsert() { 22 | } 23 | 24 | inline void init(uint32_t entryIndex, uint16_t mostSignificantBitIndex) { 25 | mEntryIndex = entryIndex; 26 | mMostSignificantBitIndex = mostSignificantBitIndex; 27 | } 28 | }; 29 | 30 | }} 31 | 32 | #endif -------------------------------------------------------------------------------- /lits/hot_src/include/BiNode.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __HOT__COMMONS__BI_NODE___ 2 | #define __HOT__COMMONS__BI_NODE___ 3 | 4 | #include "DiscriminativeBit.hpp" 5 | 6 | #include "BiNodeInterface.hpp" 7 | 8 | namespace hot { namespace commons { 9 | 10 | template inline BiNode::BiNode(uint16_t const discriminativeBitIndex, uint16_t const height, ChildPointerType const & left, ChildPointerType const & right) : mDiscriminativeBitIndex(discriminativeBitIndex), mHeight(height), mLeft(left), mRight(right) { 11 | } 12 | 13 | template inline BiNode BiNode::createFromExistingAndNewEntry(DiscriminativeBit const & discriminativeBit, ChildPointerType const & existingNode, ChildPointerType const & newEntry) { 14 | uint16_t newHeight = existingNode.getHeight() + 1u; 15 | return discriminativeBit.mValue 16 | ? BiNode { discriminativeBit.mAbsoluteBitIndex, newHeight, existingNode, newEntry } 17 | : BiNode { discriminativeBit.mAbsoluteBitIndex, newHeight, newEntry, existingNode }; 18 | } 19 | 20 | }} 21 | 22 | #endif -------------------------------------------------------------------------------- /lits/hot_src/include/TwoEntriesNode.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __HOT__COMMONS__TWO_ENTRIES_NODE__ 2 | #define __HOT__COMMONS__TWO_ENTRIES_NODE__ 3 | 4 | #include 5 | 6 | #include "SingleMaskPartialKeyMapping.hpp" 7 | #include "BiNode.hpp" 8 | 9 | namespace hot { namespace commons { 10 | 11 | template typename NodeTemplate> inline NodeTemplate* createTwoEntriesNode(BiNode const & binaryNode) { 12 | constexpr uint16_t NUMBER_ENTRIES_IN_TWO_ENTRIES_NODE = 2u; 13 | NodeTemplate* node = 14 | new (NUMBER_ENTRIES_IN_TWO_ENTRIES_NODE) NodeTemplate( 15 | binaryNode.mHeight, 16 | NUMBER_ENTRIES_IN_TWO_ENTRIES_NODE, 17 | SingleMaskPartialKeyMapping { DiscriminativeBit { binaryNode.mDiscriminativeBitIndex } } 18 | ); 19 | 20 | node->mPartialKeys.mEntries[0] = 0; 21 | node->mPartialKeys.mEntries[1] = 1; 22 | ChildPointerType* pointers = node->getPointers(); 23 | pointers[0] = binaryNode.mLeft; 24 | pointers[1] = binaryNode.mRight; 25 | return node; 26 | }; 27 | 28 | } } 29 | 30 | #endif -------------------------------------------------------------------------------- /lits/lits_entry.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "lits_base.hpp" 4 | #include "lits_kv.hpp" 5 | 6 | namespace lits { 7 | 8 | class KV { 9 | public: 10 | str k; 11 | val v; 12 | }; 13 | 14 | // These two classes are reponsible for the KV iteration 15 | class KVS1 { 16 | public: 17 | KVS1() = default; 18 | KVS1(const KVS1 &other) = delete; 19 | KV operator[](int index) const { return {d[index]->k, d[index]->v}; } 20 | kv *ret_kv(int index) const { return d[index]; } 21 | void push(kv *_kv) { d.push_back(_kv); } 22 | void self_delete() { 23 | for (int i = 0; i < d.size(); ++i) { 24 | free_kv(d[i]); 25 | } 26 | } 27 | int getSize() const { return d.size(); } 28 | 29 | private: 30 | std::vector d; 31 | }; 32 | 33 | class KVS2 { 34 | public: 35 | KVS2(const str *keys, const val *vals) : _keys(keys), _vals(vals) {} 36 | KV operator[](int index) const { return {_keys[index], _vals[index]}; } 37 | kv *ret_kv(int index) const { return new_kv(_keys[index], _vals[index]); } 38 | 39 | private: 40 | const str *_keys; 41 | const val *_vals; 42 | }; 43 | 44 | }; // namespace lits -------------------------------------------------------------------------------- /lits/hot_src/include/BiNodeInformation.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __HOT__COMMONS__BI_NODE_INFORMATION__ 2 | #define __HOT__COMMONS__BI_NODE_INFORMATION__ 3 | 4 | #include 5 | 6 | #include "EntriesRange.hpp" 7 | 8 | namespace hot { namespace commons { 9 | 10 | /** 11 | * The BiNodeInformation, contains all information which is necessary a BiNode and the entries in its subtree for a linearized binary patricia trie. 12 | * 13 | * It therefore consists of: 14 | * + The positions of the elements contained in its left/right subtree 15 | * + The index of the corresponding discriminative bit 16 | * + A partial key representing the corresponding discriminative bit 17 | */ 18 | struct BiNodeInformation { 19 | uint32_t const mDiscriminativeBitIndex; 20 | uint32_t const mDiscriminativeBitMask; 21 | 22 | EntriesRange const mLeft; 23 | EntriesRange const mRight; 24 | 25 | BiNodeInformation(uint32_t discriminativeBitIndex, uint32_t discriminativeBitMask, EntriesRange const & left, EntriesRange const & right) 26 | : mDiscriminativeBitIndex(discriminativeBitIndex), mDiscriminativeBitMask(discriminativeBitMask), mLeft(left), mRight(right) 27 | { 28 | } 29 | 30 | /** 31 | * @return the total number of entries in the subtree rooted at the described BiNode 32 | */ 33 | uint32_t getTotalNumberEntries() const { 34 | return mLeft.mNumberEntriesInRange + mRight.mNumberEntriesInRange; 35 | } 36 | 37 | }; 38 | 39 | }} 40 | 41 | #endif -------------------------------------------------------------------------------- /lits/lits_base.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | // The MAX alphabet size which LITS supports. 11 | #define MAX_CH 128 12 | 13 | // The MAX DEPTH for insertion stack 14 | #define MAX_STACK 128 15 | 16 | // Build-in branch prediction hinter 17 | #define likely(x) __builtin_expect((x), 1) 18 | #define unlikely(x) __builtin_expect((x), 0) 19 | 20 | // Runtime Assertion and Static Assertion 21 | #define RT_ASSERT(expr) assert(expr) 22 | #define ST_ASSERT(expr) static_assert(expr) 23 | 24 | // Pointer RAW mask 25 | #define PTR_MASK 0xffffffffffffUL 26 | #define PTR_RAW(p) ((void *)(((uint64_t)(void *)(p)) & PTR_MASK)) 27 | 28 | // Debug COUT 29 | #define COUT_VAR(x) std::cout << #x << " = " << (x) << std::endl 30 | #define COUT_THIS(this) \ 31 | do { \ 32 | std::cout << this << std::endl; \ 33 | } while (0) 34 | 35 | // The maximum size of cnode 36 | #define CNODE_SIZE 16 37 | 38 | // The scale factor of the sparse item array in model-based node 39 | #define ScaleFactor 2 40 | 41 | // lits namespace 42 | namespace lits { 43 | 44 | // Key type: str 45 | using str = char *; 46 | 47 | // Value type: uint64_t 48 | using val = uint64_t; 49 | 50 | }; // namespace lits 51 | -------------------------------------------------------------------------------- /lits/hot_src/include/PartialKeyMappingBase.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __HOT__COMMONS__PARTIAL_KEY_MAPPING_BASE_HPP___ 2 | #define __HOT__COMMONS__PARTIAL_KEY_MAPPING_BASE_HPP___ 3 | 4 | #include "DiscriminativeBit.hpp" 5 | 6 | namespace hot { namespace commons { 7 | 8 | /** 9 | * A Base class for all partial key mapping informations 10 | * A Partial key mapping must be able to extract a set of discriminative bits and form partial keys consisting only of those bits 11 | * 12 | */ 13 | class PartialKeyMappingBase { 14 | public: 15 | uint16_t mMostSignificantDiscriminativeBitIndex; 16 | uint16_t mLeastSignificantDiscriminativeBitIndex; 17 | 18 | protected: 19 | //This does not initialize the fields and is only allowed to be called from subclasses 20 | //This can be used if both fields are copied together with another field using 64bit operations or better simd or avx instructions 21 | PartialKeyMappingBase() { 22 | } 23 | 24 | protected: 25 | PartialKeyMappingBase(uint16_t mostSignificantBitIndex, uint16_t leastSignificantBitIndex) : mMostSignificantDiscriminativeBitIndex(mostSignificantBitIndex), mLeastSignificantDiscriminativeBitIndex(leastSignificantBitIndex) { 26 | } 27 | 28 | PartialKeyMappingBase(PartialKeyMappingBase const existing, DiscriminativeBit const & significantKeyInformation) 29 | : PartialKeyMappingBase( 30 | std::min(existing.mMostSignificantDiscriminativeBitIndex, significantKeyInformation.mAbsoluteBitIndex), 31 | std::max(existing.mLeastSignificantDiscriminativeBitIndex, significantKeyInformation.mAbsoluteBitIndex) 32 | ) 33 | { 34 | } 35 | }; 36 | 37 | } } 38 | 39 | #endif 40 | -------------------------------------------------------------------------------- /lits/hot_src/include/PartialKeyMappingHelpers.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __HOT__COMMONS__PARTIAL_KEY_MAPPING_HELPERS__ 2 | #define __HOT__COMMONS__PARTIAL_KEY_MAPPING_HELPERS__ 3 | 4 | #include "DiscriminativeBit.hpp" 5 | 6 | namespace hot { namespace commons { 7 | 8 | 9 | template inline auto extractAndExecuteWithCorrectMaskAndDiscriminativeBitsRepresentation(DiscriminativeBitsRepresentation const & extractionInformation, PartialKeyType compressionMask, Operation const & operation) { 10 | return extractionInformation.extract(compressionMask, [&](auto const &newDiscriminativeBitsRepresentation) { 11 | return newDiscriminativeBitsRepresentation.executeWithCorrectMaskAndDiscriminativeBitsRepresentation(operation); 12 | }); 13 | } 14 | 15 | template inline auto extractAndAddAndExecuteWithCorrectMaskAndDiscriminativeBitsRepresentation(DiscriminativeBitsRepresentation const & extractionInformation, PartialKeyType compressionMask, DiscriminativeBit const & keyInformation, Operation const & operation) { 16 | return extractionInformation.extract(compressionMask, [&](auto const &intermediateDiscriminativeBitsRepresentation) { 17 | return intermediateDiscriminativeBitsRepresentation.insert(keyInformation, [&](auto const &insertedDiscriminativeBitsRepresentation) { 18 | return insertedDiscriminativeBitsRepresentation.executeWithCorrectMaskAndDiscriminativeBitsRepresentation(operation); 19 | }); 20 | }); 21 | } 22 | 23 | 24 | 25 | 26 | }} 27 | 28 | #endif -------------------------------------------------------------------------------- /lits/hot_src/include/OptionalValue.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __IDX__CONTENTHELPERS__OPTIONAL_VALUE__HPP__ 2 | #define __IDX__CONTENTHELPERS__OPTIONAL_VALUE__HPP__ 3 | 4 | /** @author robert.binna@uibk.ac.at */ 5 | 6 | namespace idx { namespace contenthelpers { 7 | 8 | /** 9 | * Helper class to represent values, which might not be set, or be in an undefined state 10 | * 11 | * @tparam ValueType the type of the value to store 12 | * @author robert.binna@uibk.ac.at 13 | */ 14 | template struct OptionalValue{ 15 | bool mIsValid; 16 | ValueType mValue; 17 | 18 | /** 19 | * constructs an optional value which has no value set and is therefore not valid 20 | */ 21 | inline OptionalValue() : mIsValid(false) { 22 | } 23 | 24 | /** 25 | * constructs an optional value. Depending on the is valid the store parameter migh be undefined 26 | * 27 | * @param isValid only if isValid is true is the value defined and valid 28 | * @param value the value to encapsulate 29 | */ 30 | inline OptionalValue(bool const & isValid, ValueType const & value) : mIsValid(isValid), mValue(value) { 31 | } 32 | 33 | /** 34 | * Test whether two optional values comply with each other. 35 | * Two optional values comply in case both are invalid or both are valid and share the same value. 36 | * 37 | * @param expected the optional value to test for compliance with the current value. 38 | * @return whether both values comply 39 | */ 40 | inline bool compliesWith(OptionalValue const & expected) const { 41 | return (this->mIsValid == expected.mIsValid) & (!this->mIsValid || (mValue == expected.mValue)); 42 | } 43 | }; 44 | 45 | }} 46 | 47 | #endif -------------------------------------------------------------------------------- /lits/hot_src/include/InsertInformation.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __HOT__COMMONS__INSERT_INFORMATION___ 2 | #define __HOT__COMMONS__INSERT_INFORMATION___ 3 | 4 | #include 5 | 6 | #include "DiscriminativeBit.hpp" 7 | 8 | namespace hot { namespace commons { 9 | 10 | /** 11 | * The insert information contains all information, which is necessary to actually insert a new key. 12 | * It consists of: 13 | * + the discriminative bit, which discriminates the entries in the affected subtree from the new entry 14 | * + a partial key which contains all discriminative bits, 15 | * which are shared by the entries in the affected subtree and the new key to insert 16 | * + the positions of the entries in the affected subtree 17 | * 18 | */ 19 | struct InsertInformation { 20 | /** 21 | * a partial key containing all discriminative bits, which are shared by the entries in the affected subtree 22 | * and the new key. 23 | */ 24 | uint32_t const mSubtreePrefixPartialKey; 25 | uint32_t const mFirstIndexInAffectedSubtree; 26 | uint32_t const mNumberEntriesInAffectedSubtree; 27 | DiscriminativeBit const mKeyInformation; 28 | 29 | InsertInformation(uint32_t const subtreePrefixPartialKey, uint32_t const firstIndexInAffectedSubtree, 30 | uint32_t const numberEntriesInAffectedSubtree, DiscriminativeBit const & keyInformation 31 | ) 32 | : mSubtreePrefixPartialKey(subtreePrefixPartialKey), mFirstIndexInAffectedSubtree(firstIndexInAffectedSubtree), 33 | mNumberEntriesInAffectedSubtree(numberEntriesInAffectedSubtree), 34 | mKeyInformation(keyInformation) 35 | { 36 | assert(numberEntriesInAffectedSubtree > 0); 37 | } 38 | 39 | unsigned int getFirstIndexInAffectedSubtree() const { 40 | return mFirstIndexInAffectedSubtree; 41 | } 42 | 43 | unsigned int getNumberEntriesInAffectedSubtree() const { 44 | return mNumberEntriesInAffectedSubtree; 45 | } 46 | 47 | }; 48 | 49 | } } 50 | 51 | #endif -------------------------------------------------------------------------------- /lits/lits_utils.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "lits_base.hpp" 4 | #include "lits_gpkl.hpp" 5 | 6 | namespace lits { 7 | 8 | /** 9 | * Check if the input data is sorted and unique 10 | * @return true if unique and sorted, false otherwise 11 | */ 12 | inline bool checkSortedUnique(const str *keys, const int len) { 13 | for (int i = 1; i < len; ++i) { 14 | if (ustrcmp(keys[i], keys[i - 1]) <= 0) { 15 | return false; 16 | } 17 | } 18 | return true; 19 | } 20 | 21 | /** 22 | * A simple inline hash function for string without go through all bytes. 23 | */ 24 | inline uint16_t hashStr(const str key) { 25 | uint16_t ret = ustrlen(key); 26 | uint16_t c1 = key[ret / 2]; 27 | uint16_t c2 = key[2 * ret / 3]; 28 | uint16_t c3 = key[4 * ret / 5]; 29 | return ret ^ c1 ^ c2 ^ c3; 30 | } 31 | 32 | /** 33 | * Return the smallest 2**k which is bigger or equal than n. 34 | * Return 2 for 2 35 | * Return 4 for (2, 4] 36 | * Return 8 for (4, 8] 37 | */ 38 | inline uint64_t quick2(uint64_t n) { 39 | n--; 40 | n |= n >> 1; 41 | n |= n >> 2; 42 | n |= n >> 4; 43 | n |= n >> 8; 44 | n |= n >> 16; 45 | n |= n >> 32; 46 | n++; 47 | return n; 48 | } 49 | 50 | /** 51 | * Return a randomly generated uint64_t. 52 | */ 53 | inline uint64_t rand64() { 54 | return ((uint64_t)rand() << 60) ^ ((uint64_t)rand() << 45) ^ 55 | ((uint64_t)rand() << 30) ^ ((uint64_t)rand() << 15) ^ 56 | ((uint64_t)rand()); 57 | } 58 | 59 | /** 60 | * Return a randomly generated workload. 61 | */ 62 | std::vector randomInput(str *keys, int len) { 63 | const long basic_cnt = 1000000; 64 | std::vector res; 65 | int idx; 66 | 67 | for (long i = 0; i < basic_cnt; ++i) { 68 | idx = rand64() % len; 69 | res.push_back(std::string(keys[idx])); 70 | } 71 | 72 | return res; 73 | } 74 | 75 | }; // namespace lits -------------------------------------------------------------------------------- /lits/hot_src/include/TidConverters.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __IDX__CONTENTHELPERS__TID_CONVERTERS__HPP__ 2 | #define __IDX__CONTENTHELPERS__TID_CONVERTERS__HPP__ 3 | 4 | /** @author robert.binna@uibk.ac.at */ 5 | 6 | #include 7 | 8 | namespace idx { namespace contenthelpers { 9 | 10 | /** 11 | * A TidToValueConverter converts a tuple identifier to its value representation 12 | * Depending on the value representation different template specializations exists. 13 | * In the default case the tuple is reinterpreted as a value. 14 | * Therefore, the default case is only suitable for ValueTypes which span 8 bytes. 15 | * 16 | * @tparam ValueType the type of the target value 17 | */ 18 | template 19 | class TidToValueConverter { 20 | public: 21 | __attribute__((always_inline)) inline ValueType operator()(intptr_t tid) { 22 | tid &= INTPTR_MAX; 23 | return *reinterpret_cast(&tid); 24 | } 25 | }; 26 | 27 | /** 28 | * A ValueToTidConverter Converts a given value to its tuple identifier 29 | * Depending on the value representation different template specializations exist. 30 | * In the default case the value is reinterpreted as a a tuple identifier. 31 | * This is only suitable for value types which sare shorter than 8 bytes. 32 | * 33 | * @tparam ValueType 34 | */ 35 | template 36 | class TidToValueConverter { 37 | public: 38 | __attribute__((always_inline)) inline ValueType *operator()(intptr_t tid) { 39 | return reinterpret_cast(tid); 40 | } 41 | }; 42 | 43 | template 44 | class ValueToTidConverter { 45 | public: 46 | __attribute__((always_inline)) inline intptr_t operator()(ValueType value) { 47 | return *reinterpret_cast(&value); 48 | } 49 | }; 50 | 51 | template 52 | class ValueToTidConverter { 53 | public: 54 | __attribute__((always_inline)) inline intptr_t operator()(ValueType *value) { 55 | return reinterpret_cast(value); 56 | } 57 | }; 58 | 59 | template 60 | __attribute__((always_inline)) inline ValueType tidToValue(intptr_t tid) { 61 | TidToValueConverter convert; 62 | return convert(tid); 63 | } 64 | 65 | template 66 | __attribute__((always_inline)) inline intptr_t valueToTid(ValueType value) { 67 | ValueToTidConverter convert; 68 | return convert(value); 69 | } 70 | 71 | } } 72 | 73 | #endif -------------------------------------------------------------------------------- /lits/hot_src/include/NodeAllocationInformations.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __HOT__COMMONS__NODE_ALLOCATION_INFORMATIONS__ 2 | #define __HOT__COMMONS__NODE_ALLOCATION_INFORMATIONS__ 3 | 4 | #include 5 | 6 | #include "NodeAllocationInformation.hpp" 7 | 8 | namespace hot { namespace commons { 9 | 10 | constexpr size_t MAXIMUM_NUMBER_NODE_ENTRIES = 32; 11 | 12 | 13 | template class NodeAllocationInformations { 14 | static std::array mAllocationInformations; 15 | 16 | public: 17 | static inline NodeAllocationInformation const & getAllocationInformation(size_t numberEntries); 18 | }; 19 | 20 | 21 | template std::array NodeAllocationInformations::mAllocationInformations { 22 | NodeTypename::getNodeAllocationInformation(1), 23 | NodeTypename::getNodeAllocationInformation(2), 24 | NodeTypename::getNodeAllocationInformation(3), 25 | NodeTypename::getNodeAllocationInformation(4), 26 | NodeTypename::getNodeAllocationInformation(5), 27 | NodeTypename::getNodeAllocationInformation(6), 28 | NodeTypename::getNodeAllocationInformation(7), 29 | NodeTypename::getNodeAllocationInformation(8), 30 | NodeTypename::getNodeAllocationInformation(9), 31 | NodeTypename::getNodeAllocationInformation(10), 32 | NodeTypename::getNodeAllocationInformation(11), 33 | NodeTypename::getNodeAllocationInformation(12), 34 | NodeTypename::getNodeAllocationInformation(13), 35 | NodeTypename::getNodeAllocationInformation(14), 36 | NodeTypename::getNodeAllocationInformation(15), 37 | NodeTypename::getNodeAllocationInformation(16), 38 | NodeTypename::getNodeAllocationInformation(17), 39 | NodeTypename::getNodeAllocationInformation(18), 40 | NodeTypename::getNodeAllocationInformation(19), 41 | NodeTypename::getNodeAllocationInformation(20), 42 | NodeTypename::getNodeAllocationInformation(21), 43 | NodeTypename::getNodeAllocationInformation(22), 44 | NodeTypename::getNodeAllocationInformation(23), 45 | NodeTypename::getNodeAllocationInformation(24), 46 | NodeTypename::getNodeAllocationInformation(25), 47 | NodeTypename::getNodeAllocationInformation(26), 48 | NodeTypename::getNodeAllocationInformation(27), 49 | NodeTypename::getNodeAllocationInformation(28), 50 | NodeTypename::getNodeAllocationInformation(29), 51 | NodeTypename::getNodeAllocationInformation(30), 52 | NodeTypename::getNodeAllocationInformation(31), 53 | NodeTypename::getNodeAllocationInformation(32) 54 | }; 55 | 56 | template inline NodeAllocationInformation const & NodeAllocationInformations::getAllocationInformation(size_t numberEntries) { 57 | return mAllocationInformations[numberEntries - 1]; 58 | } 59 | 60 | }} 61 | 62 | #endif -------------------------------------------------------------------------------- /lits/hot_src/include/NodeType.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __HOT__COMMONS__NODE_TYPE__ 2 | #define __HOT__COMMONS__NODE_TYPE__ 3 | 4 | #include 5 | #include 6 | 7 | namespace hot { namespace commons { 8 | 9 | enum class NodeType : unsigned int { 10 | SINGLE_MASK_8_BIT_PARTIAL_KEYS = 0, 11 | SINGLE_MASK_16_BIT_PARTIAL_KEYS = 1, 12 | SINGLE_MASK_32_BIT_PARTIAL_KEYS = 2, 13 | MULTI_MASK_8_BYTES_AND_8_BIT_PARTIAL_KEYS = 3, 14 | MULTI_MASK_8_BYTES_AND_16_BIT_PARTIAL_KEYS = 4, 15 | MULTI_MASK_8_BYTES_AND_32_BIT_PARTIAL_KEYS = 5, 16 | MULTI_MASK_16_BYTES_AND_16_BIT_PARTIAL_KEYS = 6, 17 | MULTI_MASK_32_BYTES_AND_32_BIT_PARTIAL_KEYS = 7 18 | }; 19 | 20 | inline NodeType getRandomNodeType(uint numberKeyBits, uint numberRandomBytes) { 21 | //log2 + shift right ---> numberBytes 0-7 > 0, numberBytes 7 - 16 > 1, numberBytes 16 - 32 > 2 22 | uint32_t numberMaskBytes = (32 - __builtin_clz((numberKeyBits - 1)/8)); 23 | 24 | //log2 + shift right ---> numberBytes 0-7 > 0, numberBytes 7 - 16 > 1, numberBytes 16 - 32 > 2 25 | uint32_t numberExtractionMasks = (32 - __builtin_clz((numberRandomBytes-1)/8)); 26 | 27 | uint moreThanASingleExtractionMask = (numberExtractionMasks > 0); 28 | 29 | //because random masks start at 3 + numberMaskBytes (counted from 0) see commend in first line of this function 30 | // + in case the numberExtractionMasks is larger than 8 the number of mask bits determine the number of of random bytes 31 | // Hence adding if the mask size is 16 (4) adding 2 will result in 6 which is for MULTI_MASK_16_BYTES_AND_16_BIT_PARTIAL_KEYS. This mus be correct as the number of random bytes can never be larger than the number of key bits 32 | // Therefore in the case of mask size 32 (5) adding 2 will result in 7 which will be correct in cases where the number of extraction bytes is below 16 as well as above 16 because in both cases the only matching extraction mask is MULTI_MASK_32_BYTES_AND_32_BIT_PARTIAL_KEYS 33 | return static_cast(3 + numberMaskBytes + (moreThanASingleExtractionMask * 2)); 34 | } 35 | 36 | inline std::string nodeAlgorithmToString(NodeType nodeAlgorithmType) { 37 | switch(nodeAlgorithmType) { 38 | case NodeType::SINGLE_MASK_8_BIT_PARTIAL_KEYS: 39 | return { "SINGLE_MASK_8_BIT_PARTIAL_KEYS " }; 40 | case NodeType::SINGLE_MASK_16_BIT_PARTIAL_KEYS: 41 | return { "SINGLE_MASK_16_BIT_PARTIAL_KEYS " }; 42 | case NodeType::SINGLE_MASK_32_BIT_PARTIAL_KEYS: 43 | return { "SINGLE_MASK_32_BIT_PARTIAL_KEYS " }; 44 | case NodeType::MULTI_MASK_8_BYTES_AND_8_BIT_PARTIAL_KEYS: 45 | return { "MULTI_MASK_8_BYTES_AND_8_BIT_PARTIAL_KEYS " }; 46 | case NodeType::MULTI_MASK_8_BYTES_AND_16_BIT_PARTIAL_KEYS: 47 | return { "MULTI_MASK_8_BYTES_AND_16_BIT_PARTIAL_KEYS " }; 48 | case NodeType::MULTI_MASK_8_BYTES_AND_32_BIT_PARTIAL_KEYS: 49 | return { "MULTI_MASK_8_BYTES_AND_32_BIT_PARTIAL_KEYS " }; 50 | case NodeType::MULTI_MASK_16_BYTES_AND_16_BIT_PARTIAL_KEYS: 51 | return { "MULTI_MASK_16_BYTES_AND_16_BIT_PARTIAL_KEYS " }; 52 | default: // MULTI_MASK_32_BYTES_AND_32_BIT_PARTIAL_KEYS: 53 | return { "MULTI_MASK_32_BYTES_AND_32_BIT_PARTIAL_KEYS " }; 54 | } 55 | } 56 | 57 | }} 58 | 59 | #endif -------------------------------------------------------------------------------- /lits/hot_src/include/DiscriminativeBit.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __HOT__COMMONS__DISCRIMINATIVE_BIT__ 2 | #define __HOT__COMMONS__DISCRIMINATIVE_BIT__ 3 | 4 | #include "Algorithms.hpp" 5 | #include "OptionalValue.hpp" 6 | 7 | namespace hot { namespace commons { 8 | 9 | constexpr uint8_t BYTE_WITH_MOST_SIGNIFICANT_BIT = 0b10000000; 10 | 11 | /** 12 | * Describes a keys single bit by its position and its value 13 | */ 14 | struct DiscriminativeBit { 15 | uint16_t const mByteIndex; 16 | uint16_t const mByteRelativeBitIndex; 17 | uint16_t const mAbsoluteBitIndex; 18 | uint mValue; 19 | 20 | 21 | public: 22 | inline DiscriminativeBit(uint16_t const significantByteIndex, uint8_t const existingByte, uint8_t const newKeyByte); 23 | inline DiscriminativeBit(uint16_t const absoluteSignificantBitIndex, uint const newBitValue=1); 24 | 25 | inline uint8_t getExtractionByte() const; 26 | 27 | private: 28 | static inline uint16_t getByteRelativeSignificantBitIndex(uint8_t const existingByte, uint8_t const newKeyByte); 29 | 30 | }; 31 | 32 | inline DiscriminativeBit::DiscriminativeBit(uint16_t const significantByteIndex, uint8_t const existingByte, uint8_t const newKeyByte) 33 | : mByteIndex(significantByteIndex) 34 | , mByteRelativeBitIndex(getByteRelativeSignificantBitIndex(existingByte, newKeyByte)) 35 | , mAbsoluteBitIndex(convertBytesToBits(mByteIndex) + mByteRelativeBitIndex) 36 | , mValue(((BYTE_WITH_MOST_SIGNIFICANT_BIT >> mByteRelativeBitIndex) & newKeyByte) != 0) 37 | { 38 | } 39 | 40 | inline DiscriminativeBit::DiscriminativeBit(uint16_t const absoluteSignificantBitIndex, uint const newBitValue) 41 | : mByteIndex(getByteIndex(absoluteSignificantBitIndex)) 42 | , mByteRelativeBitIndex(bitPositionInByte(absoluteSignificantBitIndex)) 43 | , mAbsoluteBitIndex(absoluteSignificantBitIndex) 44 | , mValue(newBitValue) 45 | { 46 | } 47 | 48 | inline uint8_t DiscriminativeBit::getExtractionByte() const { 49 | return 1 << (7 - mByteRelativeBitIndex); 50 | } 51 | 52 | inline uint16_t DiscriminativeBit::getByteRelativeSignificantBitIndex(uint8_t const existingByte, uint8_t const newKeyByte) { 53 | uint32_t mismatchByteBitMask = existingByte ^ newKeyByte; 54 | return __builtin_clz(mismatchByteBitMask) - 24; 55 | } 56 | 57 | template inline bool executeForDiffingKeys(uint8_t const* existingKey, uint8_t const* newKey, uint16_t keyLengthInBytes, Operation const & operation) { 58 | for(size_t index = 0; index < keyLengthInBytes; ++index) { 59 | uint8_t newByte = newKey[index]; 60 | uint8_t existingByte = existingKey[index]; 61 | if(existingByte != newByte) { 62 | operation(DiscriminativeBit {static_cast(index), existingByte, newByte }); 63 | return true; 64 | } 65 | } 66 | return false; 67 | }; 68 | 69 | inline idx::contenthelpers::OptionalValue getMismatchingBit(uint8_t const* existingKey, uint8_t const* newKey, uint16_t keyLengthInBytes) { 70 | for(size_t index = 0; index < keyLengthInBytes; ++index) { 71 | uint8_t newByte = newKey[index]; 72 | uint8_t existingByte = existingKey[index]; 73 | if(existingByte != newByte) { 74 | return { true, DiscriminativeBit { static_cast(index), existingByte, newByte } }; 75 | } 76 | } 77 | return { false, { 0, 0, 0 }}; 78 | }; 79 | 80 | } } 81 | 82 | #endif 83 | -------------------------------------------------------------------------------- /lits/hot_src/HOTSingleThreadedNodeBase.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __HOT__SINGLE_THREADED__HOT_SINGLE_THREADED_NODE_BASE__ 2 | #define __HOT__SINGLE_THREADED__HOT_SINGLE_THREADED_NODE_BASE__ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "include/NodeAllocationInformation.hpp" 9 | 10 | #include "MemoryPool.hpp" 11 | #include "HOTSingleThreadedNodeBaseInterface.hpp" 12 | #include "HOTSingleThreadedChildPointerInterface.hpp" 13 | 14 | constexpr uint32_t ALL_ENTRIES_USED = UINT32_MAX; //32 1 bits 15 | 16 | namespace hot { namespace singlethreaded { 17 | 18 | inline MemoryPool* HOTSingleThreadedNodeBase::getMemoryPool() { 19 | static MemoryPool memoryPool {}; 20 | return &memoryPool; 21 | } 22 | 23 | HOTSingleThreadedNodeBase::HOTSingleThreadedNodeBase(uint16_t const level, hot::commons::NodeAllocationInformation const & nodeAllocationInformation) 24 | : mFirstChildPointer(reinterpret_cast(reinterpret_cast(this) + nodeAllocationInformation.mPointerOffset)), mUsedEntriesMask(nodeAllocationInformation.mEntriesMask), mHeight(level) { 25 | } 26 | 27 | inline __attribute__((always_inline)) size_t HOTSingleThreadedNodeBase::getNumberEntries() const { 28 | return __builtin_popcount(mUsedEntriesMask); 29 | } 30 | 31 | inline __attribute__((always_inline)) bool HOTSingleThreadedNodeBase::isFull() const { 32 | return mUsedEntriesMask == ALL_ENTRIES_USED; 33 | } 34 | 35 | inline __attribute__((always_inline)) HOTSingleThreadedChildPointer const * HOTSingleThreadedNodeBase::toResult( uint32_t const resultMask) const { 36 | return getPointers() + toResultIndex(resultMask); 37 | } 38 | 39 | inline __attribute__((always_inline)) HOTSingleThreadedChildPointer* HOTSingleThreadedNodeBase::toResult( uint32_t const resultMask) { 40 | size_t const resultIndex = toResultIndex(resultMask); 41 | return getPointers() + resultIndex; 42 | } 43 | 44 | inline __attribute__((always_inline)) unsigned int HOTSingleThreadedNodeBase::toResultIndex( uint32_t resultMask ) const { 45 | assert(resultMask != 0); 46 | uint32_t const resultMaskForChildsOnly = mUsedEntriesMask & resultMask; 47 | return hot::commons::getMostSignificantBitIndex(resultMaskForChildsOnly); 48 | } 49 | 50 | inline size_t HOTSingleThreadedNodeBase::getNumberAllocations() { 51 | return getMemoryPool()->getNumberAllocations(); 52 | } 53 | 54 | 55 | inline HOTSingleThreadedChildPointer * HOTSingleThreadedNodeBase::getPointers() { 56 | return mFirstChildPointer; 57 | } 58 | 59 | inline HOTSingleThreadedChildPointer const * HOTSingleThreadedNodeBase::getPointers() const { 60 | return mFirstChildPointer; 61 | } 62 | 63 | inline typename HOTSingleThreadedNodeBase::iterator HOTSingleThreadedNodeBase::begin() 64 | { 65 | return getPointers(); 66 | } 67 | 68 | inline typename HOTSingleThreadedNodeBase::iterator HOTSingleThreadedNodeBase::end() 69 | { 70 | return getPointers() + getNumberEntries(); 71 | } 72 | 73 | inline typename HOTSingleThreadedNodeBase::const_iterator HOTSingleThreadedNodeBase::begin() const 74 | { 75 | return getPointers(); 76 | } 77 | 78 | inline typename HOTSingleThreadedNodeBase::const_iterator HOTSingleThreadedNodeBase::end() const 79 | { 80 | return getPointers() + getNumberEntries(); 81 | } 82 | 83 | } } 84 | 85 | #endif 86 | -------------------------------------------------------------------------------- /lits/hot_src/include/KeyUtilities.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __IDX__CONTENTHELPERS__KEY_UTILITIES__HPP__ 2 | #define __IDX__CONTENTHELPERS__KEY_UTILITIES__HPP__ 3 | 4 | /** @author robert.binna@uibk.ac.at */ 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | namespace idx { namespace contenthelpers { 11 | 12 | /** 13 | * returns a big endian representation of the given key. If the key is already in big endian representation the key itself will be returned 14 | * 15 | * @tparam KeyType the type of the key to convert 16 | * @param key the key to convert 17 | * @return the big endian key representation 18 | */ 19 | template inline auto toBigEndianByteOrder(KeyType const & key) { 20 | return key; 21 | }; 22 | 23 | template<> __attribute__((always_inline)) inline auto toBigEndianByteOrder(uint64_t const & key) { 24 | return __bswap_64(key); 25 | }; 26 | 27 | template<> inline auto toBigEndianByteOrder(uint32_t const & key) { 28 | return __bswap_32(key); 29 | }; 30 | 31 | template<> inline auto toBigEndianByteOrder(uint16_t const & key) { 32 | return __bswap_16(key); 33 | }; 34 | 35 | /** 36 | * @tparam KeyType the type of the key to determine the maximum key length 37 | * @return the maximum length of a key of a given type 38 | */ 39 | template constexpr inline __attribute__((always_inline)) size_t getMaxKeyLength() { 40 | return sizeof(KeyType); 41 | } 42 | 43 | constexpr size_t MAX_STRING_KEY_LENGTH = 255; 44 | template<> constexpr inline size_t getMaxKeyLength() { 45 | return MAX_STRING_KEY_LENGTH; 46 | } 47 | 48 | /** 49 | * determines the key length in bytes for a given key 50 | * 51 | * @tparam KeyType the type of the key 52 | * @param key the key to get its length in bytes for 53 | * @return the key length in bytes 54 | */ 55 | template inline size_t getKeyLength(KeyType const & key) { 56 | return getMaxKeyLength(); 57 | } 58 | 59 | template<> inline size_t getKeyLength(char const * const & key) { 60 | return std::min(strlen(key) + 1u, MAX_STRING_KEY_LENGTH); 61 | } 62 | 63 | template inline __attribute__((always_inline)) auto toFixSizedKey(KeyType const & key) { 64 | return key; 65 | } 66 | 67 | /** 68 | * return a fixed size key. A fixed sized key is a deterministic length representation for a key. 69 | * For instance for cstrings it is a 256 byte long key representation 70 | * 71 | * @tparam KeyType the type of the key 72 | * @param key the key to convert to fixed size 73 | * @return the fixed sized key 74 | */ 75 | template<> inline auto toFixSizedKey(char const * const & key) { 76 | std::array()> fixedSizeKey; 77 | strncpy(reinterpret_cast(fixedSizeKey.data()), key, getMaxKeyLength()); 78 | return fixedSizeKey; 79 | } 80 | 81 | /** 82 | * Gets a pointer to the key bytes of the given key. 83 | * Be aware that this pointer is only valid as long as keyType is valid!! 84 | * 85 | * @tparam KeyType the type of the key 86 | * @param key the key to get the byte wise representation for 87 | * @return the byte wise representation of the key 88 | */ 89 | template __attribute__((always_inline)) inline uint8_t const * interpretAsByteArray(KeyType const & key) { 90 | return reinterpret_cast(&key); 91 | } 92 | 93 | template<> inline uint8_t const* interpretAsByteArray(const char * const & cStringKey) { 94 | return reinterpret_cast(cStringKey); 95 | } 96 | 97 | }} 98 | 99 | #endif -------------------------------------------------------------------------------- /lits/hot_src/include/NodeParametersMapping.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __HOT__COMMONS__NODE_PARAMETERS_MAPPING__ 2 | #define __HOT__COMMONS__NODE_PARAMETERS_MAPPING__ 3 | 4 | #include "NodeType.hpp" 5 | #include "MultiMaskPartialKeyMappingInterface.hpp" 6 | #include "SingleMaskPartialKeyMappingInterface.hpp" 7 | 8 | namespace hot { namespace commons { 9 | 10 | template struct NodeParametersToNodeType { 11 | }; 12 | 13 | template<> struct NodeParametersToNodeType { 14 | static constexpr NodeType mNodeType { NodeType ::SINGLE_MASK_8_BIT_PARTIAL_KEYS }; 15 | }; 16 | 17 | template<> struct NodeParametersToNodeType { 18 | static constexpr NodeType mNodeType { NodeType ::SINGLE_MASK_16_BIT_PARTIAL_KEYS }; 19 | }; 20 | 21 | template<> struct NodeParametersToNodeType { 22 | static constexpr NodeType mNodeType { NodeType ::SINGLE_MASK_32_BIT_PARTIAL_KEYS }; 23 | }; 24 | 25 | template<> struct NodeParametersToNodeType, uint8_t> { 26 | static constexpr NodeType mNodeType { NodeType ::MULTI_MASK_8_BYTES_AND_8_BIT_PARTIAL_KEYS }; 27 | }; 28 | 29 | template<> struct NodeParametersToNodeType, uint16_t> { 30 | static constexpr NodeType mNodeType { NodeType ::MULTI_MASK_8_BYTES_AND_16_BIT_PARTIAL_KEYS }; 31 | }; 32 | 33 | template<> struct NodeParametersToNodeType, uint32_t> { 34 | static constexpr NodeType mNodeType { NodeType ::MULTI_MASK_8_BYTES_AND_32_BIT_PARTIAL_KEYS }; 35 | }; 36 | 37 | template<> struct NodeParametersToNodeType, uint16_t> { 38 | static constexpr NodeType mNodeType { NodeType ::MULTI_MASK_16_BYTES_AND_16_BIT_PARTIAL_KEYS }; 39 | }; 40 | 41 | template<> struct NodeParametersToNodeType, uint32_t> { 42 | static constexpr NodeType mNodeType { NodeType ::MULTI_MASK_32_BYTES_AND_32_BIT_PARTIAL_KEYS }; 43 | }; 44 | 45 | template struct NodeTypeToNodeParameters { 46 | }; 47 | 48 | template<> struct NodeTypeToNodeParameters { 49 | using PartialKeyMappingType = SingleMaskPartialKeyMapping; 50 | using PartialKeyType = uint8_t; 51 | }; 52 | 53 | template<> struct NodeTypeToNodeParameters { 54 | using PartialKeyMappingType = SingleMaskPartialKeyMapping; 55 | using PartialKeyType = uint16_t; 56 | }; 57 | 58 | template<> struct NodeTypeToNodeParameters { 59 | using PartialKeyMappingType = SingleMaskPartialKeyMapping; 60 | using PartialKeyType = uint32_t; 61 | }; 62 | 63 | template<> struct NodeTypeToNodeParameters { 64 | using PartialKeyMappingType = MultiMaskPartialKeyMapping<1u>; 65 | using PartialKeyType = uint8_t; 66 | }; 67 | 68 | template<> struct NodeTypeToNodeParameters { 69 | using PartialKeyMappingType = MultiMaskPartialKeyMapping<1u>; 70 | using PartialKeyType = uint16_t; 71 | }; 72 | 73 | template<> struct NodeTypeToNodeParameters { 74 | using PartialKeyMappingType = MultiMaskPartialKeyMapping<1u>; 75 | using PartialKeyType = uint32_t; 76 | }; 77 | 78 | template<> struct NodeTypeToNodeParameters { 79 | using PartialKeyMappingType = MultiMaskPartialKeyMapping<2u>; 80 | using PartialKeyType = uint16_t; 81 | }; 82 | 83 | template<> struct NodeTypeToNodeParameters { 84 | using PartialKeyMappingType = MultiMaskPartialKeyMapping<4u>; 85 | using PartialKeyType = uint32_t; 86 | }; 87 | 88 | }} 89 | 90 | #endif -------------------------------------------------------------------------------- /lits/hot_src/MemoryPool.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __HOT__SINGLE_THREADED__MEMORY_POOL__ 2 | #define __HOT__SINGLE_THREADED__MEMORY_POOL__ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | namespace hot { namespace singlethreaded { 9 | 10 | class FreeListEntry; 11 | 12 | class FreeListEntry { 13 | private: 14 | FreeListEntry* mNext; 15 | size_t mListSize; 16 | 17 | public: 18 | FreeListEntry() : mNext(nullptr), mListSize(0) { 19 | 20 | } 21 | 22 | FreeListEntry(FreeListEntry* next) : mNext(next), mListSize(next->mListSize + 1) { 23 | } 24 | 25 | FreeListEntry* getNext() { 26 | return mNext; 27 | } 28 | 29 | size_t getListSize() { 30 | return mListSize; 31 | } 32 | }; 33 | 34 | template class MemoryPool; 35 | 36 | template class MemoryPool { 37 | static constexpr size_t SIZE_BEFORE_EVICTION_BEGIN_SIZE = EVICTION_BEGIN_SIZE - 1u; 38 | static constexpr size_t ELEMENT_SIZE = sizeof(ElementType); 39 | static FreeListEntry TERMINATING_ENTRY; 40 | 41 | std::array mFreeLists; 42 | size_t mNumberAllocations; 43 | size_t mNumberFrees; 44 | 45 | public: 46 | MemoryPool() : mNumberAllocations(0ul), mNumberFrees(0ul) { 47 | std::fill(mFreeLists.begin(), mFreeLists.end(), &TERMINATING_ENTRY); 48 | } 49 | 50 | MemoryPool(MemoryPool const & other) = delete; 51 | MemoryPool& operator=(MemoryPool const & other) = delete; 52 | 53 | ~MemoryPool() { 54 | for(FreeListEntry** freeList = mFreeLists.begin(); freeList != mFreeLists.end(); ++freeList) { 55 | while((*freeList)->getListSize() > 0) { 56 | *freeList = freeEntry(*freeList); 57 | } 58 | } 59 | } 60 | 61 | void* alloc(size_t numberElements) { 62 | FreeListEntry* & head = getFreeListHead(numberElements); 63 | 64 | void* rawMemory; 65 | if(head->getListSize() == 0) { 66 | ++mNumberAllocations; 67 | int error = posix_memalign(&rawMemory, sizeof(ElementType), numberElements * sizeof(ElementType)); 68 | if(error != 0) { 69 | //"Got error on alignment" 70 | throw std::bad_alloc(); 71 | } 72 | } else { 73 | rawMemory = reinterpret_cast(head); 74 | head = head->getNext(); 75 | } 76 | 77 | return rawMemory; 78 | } 79 | 80 | void returnToPool(size_t numberElements, void* rawMemory) { 81 | FreeListEntry* & head = getFreeListHead(numberElements); 82 | if(head->getListSize() < SIZE_BEFORE_EVICTION_BEGIN_SIZE) { 83 | head = new (rawMemory) FreeListEntry(head); 84 | } else { 85 | free(rawMemory); 86 | ++mNumberFrees; 87 | while (head->getListSize() > EVICTION_END_SIZE) { 88 | head = freeEntry(head); 89 | } 90 | } 91 | } 92 | 93 | size_t getNumberAllocations() const { 94 | return mNumberAllocations; 95 | } 96 | 97 | size_t getNumberFrees() const { 98 | return mNumberFrees; 99 | } 100 | 101 | private: 102 | FreeListEntry* freeEntry(FreeListEntry* head) { 103 | assert(head->getListSize() != 0u); 104 | FreeListEntry* next = head->getNext(); 105 | free(head); 106 | ++mNumberFrees; 107 | return next; 108 | } 109 | 110 | FreeListEntry* & getFreeListHead(size_t numberElements) { 111 | return mFreeLists[numberElementsToFreeListId(numberElements)]; 112 | } 113 | 114 | size_t numberElementsToFreeListId(size_t numberElements) { 115 | assert(numberElements > 0); 116 | assert(numberElements <= NUMBER_LISTS); 117 | 118 | return numberElements - 1; 119 | } 120 | }; 121 | 122 | template 123 | FreeListEntry MemoryPool::TERMINATING_ENTRY {}; 124 | 125 | }} 126 | 127 | #endif -------------------------------------------------------------------------------- /lits/hot_src/include/Algorithms.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __HOT__COMMONS__ALGORITHMS__ 2 | #define __HOT__COMMONS__ALGORITHMS__ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | 11 | namespace hot { namespace commons { 12 | 13 | inline uint32_t getBytesUsedInExtractionMask(uint64_t successiveExtractionMask) { 14 | uint32_t const unsetBytes = _mm_movemask_pi8(_mm_cmpeq_pi8(_mm_and_si64(_mm_set_pi64x(successiveExtractionMask), _mm_set_pi64x(UINT64_MAX)), _mm_setzero_si64())); 15 | //8 - numberUnsetBytes 16 | return 8 - _mm_popcnt_u32(unsetBytes); 17 | } 18 | 19 | inline uint16_t getMaximumMaskByteIndex(uint16_t bitsUsed) { 20 | return (bitsUsed - 1)/8; 21 | } 22 | 23 | template inline std::array getUsedExtractionBitsForMask(uint32_t usedBits, uint64_t const * extractionMask); 24 | 25 | template inline __m256i extractionMaskToRegister(std::array const & extractionData); 26 | 27 | template<> inline __m256i extractionMaskToRegister<1>(std::array const & extractionData) { 28 | return _mm256_set_epi64x(extractionData[0], 0ul, 0ul, 0ul); 29 | }; 30 | 31 | template<> inline __m256i extractionMaskToRegister<2>(std::array const & extractionData) { 32 | return _mm256_set_epi64x(extractionData[0], extractionData[1], 0ul, 0ul); 33 | } 34 | 35 | template<> inline __m256i extractionMaskToRegister<4>(std::array const & extractionData) { 36 | return _mm256_loadu_si256(reinterpret_cast<__m256i const *>(extractionData.data())); 37 | } 38 | 39 | template inline std::array extractSuccesiveFromRandomBytes(uint8_t const * bytes, uint8_t const * bytePositions) { 40 | std::array succesiveBytes; 41 | for(uint i=0; i < numberBytes; ++i) { 42 | succesiveBytes[i] = bytes[bytePositions[i]]; 43 | } 44 | return std::move(succesiveBytes); 45 | } 46 | 47 | 48 | /** 49 | * Given a bitindex this function returns its corresponding byte index 50 | * 51 | * @param bitIndex the bit index to convert to byte Level 52 | * @return the byte index 53 | */ 54 | inline unsigned int getByteIndex(unsigned int bitIndex) { 55 | return bitIndex/8; 56 | } 57 | 58 | 59 | /** 60 | * gets the number of bytes needed to represent the successive bytes from (inclusive) the byte containing 61 | * the mostSignificantBitIndex until (inclusive) the byte containing the leastSignificantBitIndex 62 | * 63 | * @param mostSignificantBitIndex the index of the most significant bit 64 | * @param leastSignificantBitIndex the index of the least significant bit 65 | * @return the size of the range in bytes 66 | */ 67 | inline uint getByteRangeSize(uint mostSignificantBitIndex, uint leastSignificantBitIndex) { 68 | return getByteIndex(leastSignificantBitIndex) - getByteIndex(mostSignificantBitIndex); 69 | } 70 | 71 | constexpr inline uint16_t convertBytesToBits(uint16_t const byteIndex) { 72 | return byteIndex * 8; 73 | } 74 | 75 | constexpr inline uint16_t bitPositionInByte(uint16_t const absolutBitPosition) { 76 | return absolutBitPosition % 8; 77 | } 78 | 79 | constexpr uint64_t HIGHEST_UINT64_BIT = (1ul << 63); 80 | 81 | inline uint getSuccesiveByteOffsetForMostRightByte(uint mostRightByte) { 82 | return std::max(0, ((int) mostRightByte) - 7); 83 | } 84 | 85 | inline bool isNoMissmatch(std::pair const & missmatch, uint8_t const* key1, uint8_t const* key2, size_t keyLength) { 86 | return missmatch.first == (key1 + keyLength) && missmatch.second == (key2 + keyLength); 87 | } 88 | 89 | inline bool isBitSet(uint8_t const * existingRawKey, uint16_t const mAbsoluteBitIndex) { 90 | return (existingRawKey[getByteIndex(mAbsoluteBitIndex)] & (0b10000000 >> bitPositionInByte(mAbsoluteBitIndex))) > 0; 91 | } 92 | 93 | inline uint16_t getLeastSignificantBitIndexInByte(uint8_t byte) { 94 | return (7 - _tzcnt_u32(byte)); 95 | } 96 | 97 | inline uint16_t getMostSignificantBitIndexInByte(uint8_t byte) { 98 | assert(byte > 0); 99 | return _lzcnt_u32(byte) - 24; 100 | } 101 | 102 | inline __attribute__((always_inline)) int getMostSignificantBitIndex(uint32_t number) { 103 | int msb; 104 | asm("bsr %1,%0" : "=r"(msb) : "r"(number)); 105 | return msb; 106 | } 107 | 108 | } } 109 | 110 | #endif 111 | -------------------------------------------------------------------------------- /lits/hot_src/HOTSingleThreadedDeletionInformation.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __HOT__SINGLETHREADED__HOT_SINGLE_THREADED_DELETION_INFORMATION__ 2 | #define __HOT__SINGLETHREADED__HOT_SINGLE_THREADED_DELETION_INFORMATION__ 3 | 4 | #include "include/BiNodeInformation.hpp" 5 | 6 | #include "HOTSingleThreadedChildPointerInterface.hpp" 7 | #include "HOTSingleThreadedNodeBaseInterface.hpp" 8 | 9 | namespace hot { namespace singlethreaded { 10 | 11 | /** 12 | * The deletion information required to create a new node by removing an entry from a given node. 13 | * It therefore contains: 14 | * - information required to compress and recode existing partial keys 15 | * - the index of the entry to remove 16 | * - information regarding the BiNode affected by the deletion operation consisting of: 17 | * + the position of the entries in this BiNode 18 | * + the position of the discriminative bit corresponding to this BiNode 19 | * + the side of the BiNode the entry to delete was contained in (mDiscriminativeBitValueForEntry=0 => left, mDiscriminativeBitValueForEntry=1 => right) 20 | * - a potential direct neighbour of the entry to delete (this is important for merge operations, or BiNode pull down => in both case this information must be taken from the parent's node deletion information) 21 | */ 22 | class HOTSingleThreadedDeletionInformation { 23 | HOTSingleThreadedChildPointer mContainingNode; 24 | uint32_t mCompressionMask; 25 | uint32_t mIndexOfEntryToRemove; 26 | uint32_t mDiscriminativeBitValueForEntry; 27 | 28 | hot::commons::BiNodeInformation mBiNodeInformation; 29 | HOTSingleThreadedChildPointer* mPotentialDirectNeighbour; 30 | 31 | 32 | public: 33 | HOTSingleThreadedDeletionInformation(HOTSingleThreadedChildPointer const & containingNode, uint32_t compressionMask, uint32_t indexOfEntryToRemove, hot::commons::BiNodeInformation const & biNodeInformation) 34 | : mContainingNode(containingNode), 35 | mCompressionMask(compressionMask), 36 | mIndexOfEntryToRemove(indexOfEntryToRemove), 37 | mDiscriminativeBitValueForEntry(1 - (biNodeInformation.mRight.mFirstIndexInRange - indexOfEntryToRemove)), 38 | mBiNodeInformation(biNodeInformation), 39 | mPotentialDirectNeighbour(determineDirectNeighbourIfAvailable()) 40 | { 41 | } 42 | 43 | /** 44 | * 45 | * @return whether the entry to delete has a direct neighbour. An entry has a direct neighbour if its sibling BiNode is leaf BiNode and therefore either points to an actual leaf value of a child HOT node. 46 | */ 47 | bool hasDirectNeighbour() const { 48 | return mBiNodeInformation.getTotalNumberEntries() == 2; 49 | } 50 | 51 | /** 52 | * 53 | * @return if the entry to delete has a direct neighbour this function returns the direct neighbour. Otherwise it will return a null child pointer. 54 | */ 55 | HOTSingleThreadedChildPointer* getDirectNeighbourIfAvailable() const { 56 | return mPotentialDirectNeighbour; 57 | } 58 | 59 | /** 60 | * 61 | * @return the mask which can be used to recode/compress the partial keys for the required partial key representation in a new node without the entry to remove 62 | */ 63 | uint32_t getCompressionMask() const { 64 | return mCompressionMask; 65 | } 66 | 67 | /** 68 | * @return the index of the entry to delete in the original node 69 | */ 70 | uint32_t getIndexOfEntryToRemove() const { 71 | return mIndexOfEntryToRemove; 72 | } 73 | 74 | /** 75 | * @return whether the entry to delete is in the left or right subtree of the subtree rooted at the affected BiNode. 76 | */ 77 | uint32_t getDiscriminativeBitValueForEntry() const { 78 | return mDiscriminativeBitValueForEntry; 79 | } 80 | 81 | /** 82 | * When recursively deleting entries, triggered by a merge or BiNode pushdown: 83 | * + first the sibling node is replaced from the parent 84 | * + second the remaining entry of two affected entries will be replaced with the merged node 85 | * + finally the newly created parent node is integrated into the structure 86 | * 87 | * @return the index of the remaining entry which must be replaced in the second step 88 | */ 89 | uint32_t getIndexOfEntryToReplace() const { 90 | return mIndexOfEntryToRemove - mDiscriminativeBitValueForEntry; 91 | } 92 | 93 | hot::commons::BiNodeInformation const & getAffectedBiNode() const { 94 | return mBiNodeInformation; 95 | } 96 | 97 | /** 98 | * @return a pointer to the node which contains the entry to delete 99 | */ 100 | HOTSingleThreadedChildPointer const & getContainingNode() const { 101 | return mContainingNode; 102 | } 103 | 104 | private: 105 | HOTSingleThreadedChildPointer* determineDirectNeighbourIfAvailable() const { 106 | uint32_t directNeighbourIndex = mBiNodeInformation.mRight.mFirstIndexInRange - mDiscriminativeBitValueForEntry; 107 | return hasDirectNeighbour() ? mContainingNode.getNode()->getPointers() + directNeighbourIndex : nullptr; 108 | } 109 | }; 110 | 111 | }} 112 | 113 | #endif -------------------------------------------------------------------------------- /lits/lits_hot.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "lits_base.hpp" 4 | #include "lits_kv.hpp" 5 | 6 | // For HOT tail trie 7 | // #include "hot_src/HOTSingleThreaded.hpp" 8 | // #include "hot_src/HOTSingleThreadedInterface.hpp" 9 | #include "hot_src/HOTSingleThreaded.hpp" 10 | #include "hot_src/HOTSingleThreadedInterface.hpp" 11 | 12 | #include 13 | 14 | namespace lits { 15 | 16 | // Key extractor used in HOT 17 | template class KeyExtracter { 18 | public: 19 | using KeyType = char const *; 20 | KeyType operator()(const T &value) { return value._kv->k; } 21 | KeyType operator()(const KeyType k) { return k; } 22 | }; 23 | 24 | using HOTIter = hot::singlethreaded::HOTSingleThreadedIterator; 25 | using HOTIndex = hot::singlethreaded::HOTSingleThreaded; 26 | 27 | // Assert the size of HOTIndex, for single threaded LITS, the sizeof HOTIndex 28 | // must be 8 bytes 29 | ST_ASSERT(sizeof(HOTIndex) == sizeof(uint64_t)); 30 | 31 | /** 32 | * @brief Find a key in the HOTIndex. 33 | * 34 | * @param index The HOTIndex object to be searched. 35 | * @param k The key to be found. 36 | * 37 | * @return An iterator to the found key or index.end() if not found. 38 | */ 39 | inline auto HOTFind(const HOTIndex &index, const str k) -> HOTIter { 40 | return index.find(k); 41 | } 42 | 43 | /** 44 | * @brief Returns an iterator to the first element of the HOTIndex. 45 | * 46 | * @param index The HOTIndex object to be iterated over. 47 | * 48 | * @return An iterator to the first element of the HOTIndex. 49 | */ 50 | inline auto HOTBegin(const HOTIndex &index) -> HOTIter { 51 | // Returns an iterator to the first element of the HOTIndex. 52 | return index.begin(); 53 | } 54 | 55 | /** 56 | * @brief Insert a key-value pair into the HOTIndex. 57 | * 58 | * @param index The HOTIndex object to be inserted into. 59 | * @param k The key to be inserted. 60 | * @param v The value to be inserted. 61 | * 62 | * @return true if the insertion is successful, false if the key already 63 | * exists. 64 | */ 65 | inline bool HOTInsert(HOTIndex &index, const str k, const uint64_t v) { 66 | // Try to insert the key-value pair into the HOTIndex. 67 | return index.insert(ST_kv(k, v)); 68 | } 69 | 70 | /** 71 | * @brief Insert a key-value pair into the HOTIndex. 72 | * 73 | * @param index The HOTIndex object to be inserted into. 74 | * @param _kv The key-value pair to be inserted. 75 | * 76 | * @return true if the insertion is successful, false if the key already 77 | * exists. 78 | */ 79 | inline bool HOTInsert(HOTIndex &index, kv *_kv) { 80 | // Insert the key-value pair into the HOTIndex. 81 | return index.insert(ST_kv(_kv)); 82 | } 83 | 84 | /** 85 | * @brief Lookup a key in the HOTIndex. 86 | * 87 | * @param index The HOTIndex object to be searched. 88 | * @param k The key to be found. 89 | * 90 | * @return A pointer to the found key-value pair or NULL if not found. 91 | */ 92 | inline kv *HOTLookup(HOTIndex &index, const str k) { 93 | // Lookup the key in the HOTIndex. 94 | auto ret = index.lookup(k); 95 | 96 | // If the key is found, return a pointer to the key-value pair, 97 | // otherwise return NULL. 98 | return ret.mIsValid ? ret.mValue.getKV() : NULL; 99 | } 100 | 101 | /** 102 | * @brief Insert or update a key-value pair in the HOTIndex. 103 | * 104 | * @param index The HOTIndex object to be inserted or updated. 105 | * @param k The key to be inserted or updated. 106 | * @param v The value to be inserted or updated. 107 | * 108 | * @return A pointer to the inserted or updated key-value pair, or NULL if 109 | * insertion fails due to key already existing. 110 | */ 111 | inline kv *HOTUpsert(HOTIndex &index, const str k, const uint64_t v) { 112 | auto res = index.upsert(ST_kv(k, v)); 113 | /// Return the inserted or updated key-value pair if successful, 114 | /// otherwise return NULL. 115 | return res.mIsValid ? res.mValue.getKV() : NULL; 116 | } 117 | 118 | inline bool HOTRemove(HOTIndex &index, const str k) { return index.remove(k); } 119 | 120 | /** 121 | * @brief Bulkload a range of key-value pairs into the HOTIndex. 122 | * 123 | * This function inserts a range of key-value pairs into the HOTIndex in 124 | * bulk. The range of key-value pairs to be inserted is specified by the 125 | * half-open interval [l, r). 126 | * 127 | * @param index The HOTIndex object to be bulkloaded. 128 | * @param kvs A container of key-value pairs to be bulkloaded. 129 | * @param l The start index of the range of key-value pairs to be bulkloaded 130 | * (inclusive). 131 | * @param r The end index of the range of key-value pairs to be bulkloaded 132 | * (exclusive). 133 | */ 134 | template 135 | inline void HOTBulkload(HOTIndex &index, const record &kvs, const int l, 136 | const int r) { 137 | // Insert the key-value pairs into the HOTIndex in bulk. 138 | for (int i = l; i < r; ++i) { 139 | index.insert(ST_kv(kvs[i].k, kvs[i].v)); 140 | } 141 | } 142 | 143 | }; // namespace lits 144 | -------------------------------------------------------------------------------- /genId.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | bool fileExists(const std::string &filename) { 11 | std::ifstream infile(filename); 12 | return infile.good(); 13 | } 14 | 15 | bool sortedAndUnique(const std::vector &keys) { 16 | for (int i = 1; i < keys.size(); ++i) { 17 | if (keys[i - 1] >= keys[i]) { 18 | return false; 19 | } 20 | } 21 | return true; 22 | } 23 | 24 | class IdGenerator { 25 | private: 26 | static const int ProvinceCodeCnt = 34; 27 | static const int CityCodeCnt = 80; 28 | static const int CountyCodeCnt = 70; 29 | static const int YearMin = 1949; 30 | static const int YearMax = 2024; 31 | static const int PoliceCodeCnt = 100; 32 | static const int GenderCodeCnt = 10; 33 | static const int CheckCodeCnt = 10; 34 | 35 | static const int ProvinceCodes[ProvinceCodeCnt]; 36 | 37 | static int getProvinceCode() { 38 | return ProvinceCodes[rand() % ProvinceCodeCnt]; 39 | } 40 | 41 | static int getCityCode() { return rand() % CityCodeCnt; } 42 | 43 | static int getCountyCode() { return rand() % CountyCodeCnt; } 44 | 45 | static int getYearCode() { return YearMin + rand() % (YearMax - YearMin); } 46 | 47 | static int getMonthDayCode() { 48 | int month = rand() % 12 + 1; 49 | switch (month) { 50 | case 4: 51 | case 6: 52 | case 9: 53 | case 11: 54 | return month * 100 + rand() % 30 + 1; 55 | case 2: 56 | return month * 100 + rand() % 28 + 1; 57 | default: 58 | return month * 100 + rand() % 31 + 1; 59 | } 60 | } 61 | 62 | static int getPoliceCode() { return rand() % PoliceCodeCnt; } 63 | 64 | static int getGenderCode() { return rand() % GenderCodeCnt; } 65 | 66 | static int getCheckCode() { return rand() % CheckCodeCnt; } 67 | 68 | public: 69 | static std::string getId() { 70 | std::string ret; 71 | ret += std::to_string(getProvinceCode()); 72 | 73 | int CityCode = getCityCode(); 74 | if (CityCode < 10) { 75 | ret += "0"; 76 | } 77 | ret += std::to_string(CityCode); 78 | 79 | int CountyCode = getCountyCode(); 80 | if (CountyCode < 10) { 81 | ret += "0"; 82 | } 83 | ret += std::to_string(CountyCode); 84 | 85 | ret += std::to_string(getYearCode()); 86 | 87 | int MonthDayCode = getMonthDayCode(); 88 | if (MonthDayCode < 1000) { 89 | ret += "0"; 90 | } 91 | ret += std::to_string(MonthDayCode); 92 | 93 | int PoliceCode = getPoliceCode(); 94 | if (PoliceCode < 10) { 95 | ret += "0"; 96 | } 97 | ret += std::to_string(PoliceCode); 98 | 99 | ret += std::to_string(getGenderCode()); 100 | ret += std::to_string(getCheckCode()); 101 | return ret; 102 | } 103 | 104 | static std::string getRandstr(int len = 20) { 105 | std::string ret; 106 | for (int i = 0; i < len; ++i) { 107 | ret.push_back(rand() % 26 + 'a'); 108 | } 109 | return ret; 110 | } 111 | 112 | static std::vector getKeys(int cnt, int type) { 113 | 114 | // The target file: Idcards.txt 115 | std::string filename = type == 0 ? "Idcards.txt" : "Randstr.txt"; 116 | 117 | // Return Idcards 118 | std::vector ids; 119 | 120 | if (fileExists(filename)) { 121 | std::cout << "Reading keys from " << filename << " ... " 122 | << std::endl; 123 | std::ifstream infile(filename); 124 | std::string line; 125 | ids.clear(); 126 | while (std::getline(infile, line)) { 127 | ids.push_back(line); 128 | } 129 | infile.close(); 130 | } else { 131 | std::cout << "File not found. Generating keys ... " << std::endl; 132 | 133 | // Generate ids and store them in Idcards.txts 134 | std::set idSet; 135 | while (idSet.size() < cnt) { 136 | std::string id = type == 0 ? getId() : getRandstr(); 137 | idSet.insert(id); 138 | } 139 | ids.assign(idSet.begin(), idSet.end()); 140 | std::sort(ids.begin(), ids.end()); // Sort the IDs 141 | std::cout << cnt << " Keys Generated in " << filename << std::endl; 142 | 143 | // Write the ids into Idcards.txt 144 | std::ofstream outFile(filename); 145 | if (!outFile.is_open()) { 146 | std::cerr << "Unable to open file: " << filename << std::endl; 147 | exit(0); 148 | } 149 | 150 | for (const auto &str : ids) { 151 | outFile << str << std::endl; 152 | } 153 | 154 | outFile.close(); 155 | } 156 | 157 | return ids; 158 | } 159 | }; 160 | 161 | const int IdGenerator::ProvinceCodes[IdGenerator::ProvinceCodeCnt] = { 162 | 11, 12, 13, 14, 15, 21, 22, 23, 31, 32, 33, 34, 35, 36, 37, 41, 42, 163 | 43, 44, 45, 46, 50, 51, 52, 53, 54, 61, 62, 63, 64, 65, 71, 81, 82}; 164 | -------------------------------------------------------------------------------- /lits/hot_src/HOTSingleThreadedNodeBaseInterface.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __HOT__SINGLE_THREADED__HOT_SINGLE_THREADED_NODE_BASE_INTERFACE__ 2 | #define __HOT__SINGLE_THREADED__HOT_SINGLE_THREADED_NODE_BASE_INTERFACE__ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "include/NodeAllocationInformation.hpp" 9 | 10 | #include "HOTSingleThreadedChildPointerInterface.hpp" 11 | #include "MemoryPool.hpp" 12 | 13 | namespace hot { namespace singlethreaded { 14 | 15 | constexpr size_t SIMD_COB_TRIE_NODE_ALIGNMENT = 8; 16 | constexpr size_t MAXIMUM_NODE_SIZE_IN_LONGS = 60u; 17 | 18 | struct alignas(SIMD_COB_TRIE_NODE_ALIGNMENT) HOTSingleThreadedNodeBase { 19 | using const_iterator = HOTSingleThreadedChildPointer const *; 20 | using iterator = HOTSingleThreadedChildPointer *; 21 | 22 | friend class HOTSingleThreadedChildPointer; 23 | 24 | public: 25 | /** 26 | * points to the first child pointer contained in this node 27 | */ 28 | HOTSingleThreadedChildPointer* mFirstChildPointer; 29 | 30 | /** 31 | * a mask having all bits set, which corresponds to used entries. 32 | * Smaller indexes correspond to less significant bits. 33 | * 34 | * eg. 35 | * If entry with index 0 is used the least significant bit is set. 36 | * It entry with index 31 is used the most significant bit is set. 37 | */ 38 | uint32_t mUsedEntriesMask; 39 | 40 | protected: 41 | 42 | public: 43 | /** 44 | * the height of this node. The height of a node is defined by the height of its subtree. 45 | * A leaf node therefore has height 1, its parent height 2 and so forth. 46 | */ 47 | uint16_t const mHeight; 48 | 49 | protected: 50 | inline static MemoryPool* getMemoryPool(); 51 | 52 | inline HOTSingleThreadedNodeBase(uint16_t const level, hot::commons::NodeAllocationInformation const & allocationInformation); 53 | inline void operator delete (void * rawMemory) = delete; 54 | 55 | /** 56 | * determines the child pointer corresponding to a result mask. 57 | * A result mask has all bits set, which correspond to potential results. 58 | * To determine the actual results the resultMask is intersected with the used entries mask and the index 59 | * of the most significant bit corresponds to the result index. 60 | * Finally the child pointer positioned at this index is returned as the result candidate 61 | * 62 | * @param resultMask the mask for potential results. 63 | * @return the actual result candidate. 64 | */ 65 | inline HOTSingleThreadedChildPointer const * toResult( uint32_t resultMask) const; 66 | 67 | /** 68 | * determines the child pointer corresponding to a result mask. 69 | * A result mask has all bits set, which correspond to potential results. 70 | * To determine the actual results the resultMask is intersected with the used entries mask and the index 71 | * of the most significant bit corresponds to the result index. 72 | * Finally the child pointer positioned at this index is returned as the result candidate 73 | * 74 | * @param resultMask the mask for potential results. 75 | * @return the actual result candidate. 76 | */ 77 | inline HOTSingleThreadedChildPointer* toResult( uint32_t resultMask); 78 | 79 | /** 80 | * Determines the index of the actual result candidate. 81 | * Therefore the resultMask is first restricted to the actual used entries and then the 82 | * index corresponding to the most significant set bit is returned as the index of the actual result candidate 83 | * 84 | * @param resultMask the mask having bits set for all potential result candidates 85 | * @return the index of the actual result candidate 86 | */ 87 | inline unsigned int toResultIndex( uint32_t resultMask ) const; 88 | 89 | public: 90 | /** 91 | * @return the total number of allocations executed on the underlying memory pool 92 | */ 93 | static inline size_t getNumberAllocations(); 94 | 95 | /** 96 | * @return the number of entries stored in this node 97 | */ 98 | inline size_t getNumberEntries() const; 99 | 100 | /** 101 | * @return whether the number of entries in this node corresponds to the maximum node fanout. For HOTSingleThreaded the maximum node fanout is 32. 102 | */ 103 | inline bool isFull() const; 104 | 105 | /** 106 | * @return the pointer to the first child pointer stored in this node. All other child pointers are stored sequentially following the first child pointer. 107 | */ 108 | inline HOTSingleThreadedChildPointer * getPointers(); 109 | 110 | /** 111 | * @return the pointer to the first child pointer stored in this node. All other child pointers are stored sequentially following the first child pointer. 112 | */ 113 | inline HOTSingleThreadedChildPointer const * getPointers() const; 114 | 115 | /** 116 | * @return an iterator pointing to the first entry stored in this node 117 | */ 118 | inline iterator begin(); 119 | 120 | /** 121 | * @return an iterator pointing to the first element after the last entry stored in this node 122 | */ 123 | inline iterator end(); 124 | 125 | /** 126 | * @return an iterator pointing to the first entry stored in this node 127 | */ 128 | inline const_iterator begin() const; 129 | 130 | /** 131 | * @return an iterator pointing to the first element after the last entry stored in this node 132 | */ 133 | inline const_iterator end() const; 134 | }; 135 | 136 | } } 137 | 138 | #endif 139 | -------------------------------------------------------------------------------- /lits/hot_src/HOTSingleThreadedIterator.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __HOT__SINGLE_THREADED__HOT_SINGLE_THREADED_ITERATOR__ 2 | #define __HOT__SINGLE_THREADED__HOT_SINGLE_THREADED_ITERATOR__ 3 | 4 | #include 5 | #include 6 | 7 | #include "HOTSingleThreadedChildPointer.hpp" 8 | #include "HOTSingleThreadedNodeBase.hpp" 9 | #include "include/TidConverters.hpp" 10 | 11 | namespace hot { 12 | namespace singlethreaded { 13 | 14 | class HOTSingleThreadedIteratorStackEntry { 15 | HOTSingleThreadedChildPointer const* mCurrent; 16 | HOTSingleThreadedChildPointer const* mEnd; 17 | 18 | public: 19 | // leaf uninitialized 20 | HOTSingleThreadedIteratorStackEntry() {} 21 | 22 | HOTSingleThreadedChildPointer const* 23 | init(HOTSingleThreadedChildPointer const* current, 24 | HOTSingleThreadedChildPointer const* end) { 25 | mCurrent = current; 26 | mEnd = end; 27 | 28 | return mCurrent; 29 | } 30 | 31 | HOTSingleThreadedChildPointer const* getCurrent() const { return mCurrent; } 32 | 33 | bool isExhausted() { return mCurrent == mEnd; } 34 | 35 | void advance() { 36 | if (mCurrent != mEnd) { 37 | ++mCurrent; 38 | } 39 | } 40 | }; 41 | 42 | template typename KeyExtractor> 43 | class HOTSingleThreaded; // Forward Declaration of SIMDCobTrie for usage as 44 | // friend class 45 | 46 | template class HOTSingleThreadedIterator { 47 | template typename KeyExtractor> 48 | friend class hot::singlethreaded::HOTSingleThreaded; 49 | 50 | static HOTSingleThreadedChildPointer END_TOKEN; 51 | 52 | alignas(std::alignment_of< 53 | HOTSingleThreadedIteratorStackEntry>()) char mRawNodeStack 54 | [sizeof(HOTSingleThreadedIteratorStackEntry) * 64]; 55 | HOTSingleThreadedIteratorStackEntry* mNodeStack; 56 | size_t mCurrentDepth = 0; 57 | 58 | public: 59 | HOTSingleThreadedIterator(HOTSingleThreadedChildPointer const* mSubTreeRoot) 60 | : HOTSingleThreadedIterator(mSubTreeRoot, mSubTreeRoot + 1) { 61 | descend(); 62 | } 63 | 64 | HOTSingleThreadedIterator(HOTSingleThreadedIterator const& other) 65 | : mNodeStack(reinterpret_cast( 66 | mRawNodeStack)) { 67 | std::memcpy(this->mRawNodeStack, other.mRawNodeStack, 68 | sizeof(HOTSingleThreadedIteratorStackEntry) * 69 | (other.mCurrentDepth + 1)); 70 | mCurrentDepth = other.mCurrentDepth; 71 | } 72 | 73 | HOTSingleThreadedIterator() 74 | : mNodeStack(reinterpret_cast( 75 | mRawNodeStack)) { 76 | 77 | mNodeStack[0].init(&END_TOKEN, &END_TOKEN); 78 | } 79 | 80 | public: 81 | ValueType operator*() const { 82 | return idx::contenthelpers::tidToValue( 83 | mNodeStack[mCurrentDepth].getCurrent()->getTid()); 84 | } 85 | 86 | HOTSingleThreadedIterator& operator++() { 87 | mNodeStack[mCurrentDepth].advance(); 88 | while ((mCurrentDepth > 0) & 89 | (mNodeStack[mCurrentDepth].isExhausted())) { 90 | --mCurrentDepth; 91 | mNodeStack[mCurrentDepth].advance(); 92 | } 93 | if (mNodeStack[0].isExhausted()) { 94 | mNodeStack[0].init(&END_TOKEN, &END_TOKEN); 95 | } else { 96 | descend(); 97 | } 98 | return *this; 99 | } 100 | 101 | bool operator==(HOTSingleThreadedIterator const& other) const { 102 | return (*mNodeStack[mCurrentDepth].getCurrent()) == 103 | (*other.mNodeStack[other.mCurrentDepth].getCurrent()); 104 | } 105 | 106 | bool operator!=(HOTSingleThreadedIterator const& other) const { 107 | return (*mNodeStack[mCurrentDepth].getCurrent()) != 108 | (*other.mNodeStack[other.mCurrentDepth].getCurrent()); 109 | } 110 | 111 | private: 112 | HOTSingleThreadedIterator(HOTSingleThreadedChildPointer const* currentRoot, 113 | HOTSingleThreadedChildPointer const* rootEnd) 114 | : mNodeStack(reinterpret_cast( 115 | mRawNodeStack)) { 116 | mNodeStack[0].init(currentRoot, rootEnd); 117 | } 118 | 119 | void descend() { 120 | HOTSingleThreadedChildPointer const* currentSubtreeRoot = 121 | mNodeStack[mCurrentDepth].getCurrent(); 122 | while (currentSubtreeRoot->isAValidNode()) { 123 | HOTSingleThreadedNodeBase* currentSubtreeRootNode = 124 | currentSubtreeRoot->getNode(); 125 | currentSubtreeRoot = descend(currentSubtreeRootNode->begin(), 126 | currentSubtreeRootNode->end()); 127 | } 128 | } 129 | 130 | HOTSingleThreadedChildPointer const* 131 | descend(HOTSingleThreadedChildPointer const* current, 132 | HOTSingleThreadedChildPointer const* end) { 133 | return mNodeStack[++mCurrentDepth].init(current, end); 134 | } 135 | }; 136 | 137 | template 138 | HOTSingleThreadedChildPointer HOTSingleThreadedIterator::END_TOKEN{}; 139 | 140 | } // namespace singlethreaded 141 | } // namespace hot 142 | 143 | #endif -------------------------------------------------------------------------------- /lits/hot_src/include/NodeMergeInformation.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __HOT__COMMONS__NODE_MERGE_INFORMATION__ 2 | #define __HOT__COMMONS__NODE_MERGE_INFORMATION__ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | 10 | #include "OptionalValue.hpp" 11 | 12 | #include "DiscriminativeBit.hpp" 13 | #include "BitMask32.hpp" 14 | #include "MultiMaskPartialKeyMapping.hpp" 15 | 16 | 17 | namespace hot { namespace commons { 18 | 19 | class NodeMergeInformation { 20 | std::array mKeyWithOnlyBitForLeftSourceInformation; 21 | std::array mKeyWithOnlyBitForRightSourceInformation; 22 | bool mHasMergedMask; 23 | alignas(std::alignment_of>::value) std::array)> mRawMergedMask; 24 | 25 | public: 26 | /** 27 | * 28 | * @tparam SourceDiscriminativeBitsRepresentation1 29 | * @tparam SourcePartialKeyType1 30 | * @tparam SourceDiscriminativeBitsRepresentation1 31 | * @tparam SourcePartialKeyType2 32 | * @param left 33 | * @param right 34 | */ 35 | template NodeMergeInformation( 36 | uint16_t rootBit, SourceDiscriminativeBitsRepresentation1 const & left, SourceDiscriminativeBitsRepresentation2 const & right 37 | ) : mKeyWithOnlyBitForLeftSourceInformation(left.createIntermediateKeyWithOnlySignificantBitsSet()), 38 | mKeyWithOnlyBitForRightSourceInformation(right.createIntermediateKeyWithOnlySignificantBitsSet()), 39 | mHasMergedMask(false) 40 | { 41 | initializeMergedMask(rootBit, mKeyWithOnlyBitForLeftSourceInformation, mKeyWithOnlyBitForRightSourceInformation); 42 | }; 43 | 44 | template inline auto executeWithMergedDiscriminativeBitsRepresentationAndFittingPartialKeyType(Operation const & operation) const { 45 | assert(isValid()); 46 | MultiMaskPartialKeyMapping<4> const & temporaryExtractionMask = *reinterpret_cast const *>(mRawMergedMask.data()); 47 | 48 | decltype(operation(temporaryExtractionMask, static_cast(0u), static_cast(0u), static_cast(0u))) result; 49 | temporaryExtractionMask.executeWithCompressedDiscriminativeBitsRepresentation([&](auto const & extractionInformation) { 50 | extractionInformation.executeWithCorrectMaskAndDiscriminativeBitsRepresentation([&](auto const & mergedDiscriminativeBitsRepresentation, auto maximumMask) { 51 | uint32_t leftRecodingMask = mergedDiscriminativeBitsRepresentation.extractMask(mKeyWithOnlyBitForLeftSourceInformation.data()); 52 | uint32_t rightRecodingMask = mergedDiscriminativeBitsRepresentation.extractMask(mKeyWithOnlyBitForRightSourceInformation.data()); 53 | 54 | result = operation(mergedDiscriminativeBitsRepresentation, maximumMask, leftRecodingMask, rightRecodingMask); 55 | }); 56 | }); 57 | return result; 58 | } 59 | 60 | bool isValid() const { 61 | return mHasMergedMask; 62 | } 63 | 64 | private: 65 | void initializeMergedMask(uint16_t rootBit, std::array const & first, std::array const & second) { 66 | //AVX-512 ? 67 | alignas(8) std::array bytePositions; 68 | alignas(8) std::array byteMasks; 69 | alignas(8) std::array result; 70 | 71 | std::fill(byteMasks.begin(), byteMasks.end(), 0); 72 | std::fill(bytePositions.begin(), bytePositions.end(), 0); 73 | 74 | uint8_t* rawBytePosition = reinterpret_cast(bytePositions.data()); 75 | uint8_t* rawByteMasks = reinterpret_cast(byteMasks.data()); 76 | 77 | uint16_t nextBytePositionToUse = 0u; 78 | 79 | 80 | __m256i zero = _mm256_setzero_si256(); 81 | 82 | for(size_t i=0; i < 256; i+= sizeof(__m256i)) { 83 | __m256i firstPortion = _mm256_loadu_si256((__m256i const *) (first.data() + i)); 84 | __m256i secondPortion = _mm256_loadu_si256((__m256i const *) (second.data() + i)); 85 | __m256i bothBitsSet = _mm256_or_si256(firstPortion, secondPortion); 86 | 87 | BitMask32 usedByteIndexes(~(_mm256_movemask_epi8(_mm256_cmpeq_epi8(zero, bothBitsSet)))); 88 | 89 | _mm256_storeu_si256(reinterpret_cast<__m256i*>(result.data() + i), _mm256_or_si256(firstPortion, secondPortion)); 90 | //iterate over potentially set bytes 91 | for(uint32_t relativeUsedBytesIndex : usedByteIndexes) { 92 | size_t absoluteUsedBytesIndex = i + relativeUsedBytesIndex; 93 | uint8_t currentByteMask = result[absoluteUsedBytesIndex]; 94 | //handle first byte 95 | if(nextBytePositionToUse == 0) { 96 | unsigned int rootByteIndex = getByteIndex(rootBit); 97 | uint8_t rootByteMask = DiscriminativeBit(rootBit, 1).getExtractionByte(); 98 | if(rootByteIndex < absoluteUsedBytesIndex) { 99 | rawBytePosition[0] = rootByteIndex; 100 | rawByteMasks[0] = rootByteMask; 101 | nextBytePositionToUse = 1; 102 | } else { 103 | currentByteMask |= rootByteMask; 104 | } 105 | } 106 | 107 | //handle overflow 108 | if(nextBytePositionToUse == 32) { 109 | return; //no optional value 110 | } 111 | rawBytePosition[nextBytePositionToUse] = absoluteUsedBytesIndex; 112 | rawByteMasks[nextBytePositionToUse] = currentByteMask; 113 | ++nextBytePositionToUse; 114 | } 115 | } 116 | 117 | uint16_t numberDiscriminativeBits = popcount(byteMasks); 118 | if(numberDiscriminativeBits <= 32) { 119 | mHasMergedMask = true; 120 | new (mRawMergedMask.data()) MultiMaskPartialKeyMapping<4>( 121 | nextBytePositionToUse, numberDiscriminativeBits, bytePositions, byteMasks 122 | ); 123 | } else { 124 | assert(false); 125 | std::cout << "HERE MAY BE AN ERROR " << std::endl; 126 | } 127 | } 128 | 129 | static size_t popcount(std::array const & rawByteMasks) { 130 | return _mm_popcnt_u64(rawByteMasks[0]) 131 | + _mm_popcnt_u64(rawByteMasks[1]) 132 | + _mm_popcnt_u64(rawByteMasks[2]) 133 | + _mm_popcnt_u64(rawByteMasks[3]); 134 | } 135 | 136 | }; 137 | 138 | }} 139 | 140 | #endif -------------------------------------------------------------------------------- /example.cpp: -------------------------------------------------------------------------------- 1 | #include "lits/lits.hpp" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #define RESET "\033[0m" 10 | #define RED "\033[31m" 11 | #define GREEN "\033[32m" 12 | #define YELLOW "\033[33m" 13 | #define BLUE "\033[34m" 14 | 15 | std::vector strings; 16 | 17 | char *raw_data; 18 | int num_of_keys; 19 | char **keys; 20 | uint64_t *vals; 21 | 22 | void readWords() { 23 | std::ifstream file("words.txt"); 24 | if (!file) { 25 | std::cerr << "Fail to open words.txt" << std::endl; 26 | return; 27 | } 28 | std::string line; 29 | while (std::getline(file, line)) { 30 | strings.push_back(line); 31 | } 32 | file.close(); 33 | } 34 | 35 | void prepareData() { 36 | readWords(); 37 | uint64_t byte_size = 0, ofs = 0; 38 | for (int i = 0; i < strings.size(); ++i) { 39 | byte_size += strings[i].length() + 1; 40 | } 41 | std::cout << "Read " << strings.size() << " Keys from words.txt, " 42 | << byte_size << " Bytes at total." << std::endl; 43 | num_of_keys = strings.size(); 44 | 45 | raw_data = new char[byte_size]; 46 | keys = new char *[num_of_keys]; 47 | vals = new uint64_t[num_of_keys]; 48 | 49 | for (int i = 0; i < strings.size(); ++i) { 50 | keys[i] = raw_data + ofs; 51 | vals[i] = i + 1; 52 | memcpy(raw_data + ofs, strings[i].c_str(), strings[i].length() + 1); 53 | ofs += strings[i].length() + 1; 54 | } 55 | RT_ASSERT(ofs == byte_size); 56 | } 57 | 58 | void freeData() { 59 | strings.clear(); 60 | delete[] raw_data; 61 | delete[] keys; 62 | delete[] vals; 63 | } 64 | 65 | void exampleMain() { 66 | lits::LITS index; 67 | index.bulkload((const char **)(keys), (const uint64_t *)(vals), 68 | num_of_keys); 69 | 70 | std::string word1 = "internation"; 71 | std::string word2 = "internal"; 72 | std::string word3 = "intern"; 73 | uint64_t value1 = 123, value2 = 789; 74 | int scan_range = 6; 75 | 76 | //=====[Example 1: Lookup]============================================== 77 | std::cout << "[Example 1][Lookup]: Try to search (" << YELLOW << word1 78 | << RESET << ") in the index ... "; 79 | auto result1 = index.lookup(word1.c_str()); 80 | std::cout << RED << (result1 ? "found" : "not found") << RESET; 81 | if (result1) { 82 | std::cout << ", the value is " << result1->read() << std::endl; 83 | } else { 84 | std::cout << std::endl; 85 | } 86 | 87 | //=====[Example 2: Insert]============================================== 88 | std::cout << "[Example 2][Insert]: Try to insert (" << YELLOW << word1 89 | << RESET << ", " << BLUE << value1 << RESET 90 | << ") into the index ... "; 91 | bool result2 = index.insert(word1.c_str(), value1); 92 | std::cout << GREEN << (result2 ? "success" : "fail") << RESET << std::endl; 93 | 94 | //=====[Example 3: Lookup]============================================== 95 | std::cout << "[Example 3][Lookup]: Try to search (" << YELLOW << word1 96 | << RESET << ") in the index ... "; 97 | auto result3 = index.lookup(word1.c_str()); 98 | std::cout << GREEN << (result3 ? "found" : "not found") << RESET; 99 | if (result3) { 100 | std::cout << ", the value is " << BLUE << result3->read() << RESET 101 | << std::endl; 102 | } else { 103 | std::cout << std::endl; 104 | } 105 | 106 | //=====[Example 4: Upsert]============================================== 107 | std::cout << "[Example 4][Upsert]: Try to upsert (" << YELLOW << word1 108 | << RESET << ", " << BLUE << value2 << RESET 109 | << ") into the index ... "; 110 | auto result4 = index.upsert(word1.c_str(), value2); 111 | if (result4) { 112 | std::cout << "the value: (" << BLUE << result4 << RESET << ") -> (" 113 | << BLUE << value2 << RESET << ")" << std::endl; 114 | } else { 115 | std::cout << "the value: (NULL) -> (" << BLUE << value2 << RESET << ")" 116 | << std::endl; 117 | } 118 | 119 | //=====[Example 5: Lookup]============================================== 120 | std::cout << "[Example 5][Lookup]: Try to search (" << YELLOW << word1 121 | << RESET << ") in the index ... "; 122 | auto result5 = index.lookup(word1.c_str()); 123 | std::cout << GREEN << (result5 ? "found" : "not found") << RESET; 124 | if (result5) { 125 | std::cout << ", the value is " << BLUE << result5->read() << RESET 126 | << std::endl; 127 | } else { 128 | std::cout << std::endl; 129 | } 130 | 131 | //=====[Example 6: Delete]============================================== 132 | std::cout << "[Example 6][Delete]: Try to delete (" << YELLOW << word2 133 | << RESET << ") in the index ... "; 134 | auto result6 = index.remove(word2.c_str()); 135 | std::cout << GREEN << (result6 ? "success" : "fail") << RESET << std::endl; 136 | 137 | //=====[Example 7: Scan]============================================== 138 | std::cout << "[Example 7][Scan]: Try to find (" << YELLOW << word3 << RESET 139 | << ") in the index ... "; 140 | auto result7 = index.find(word3.c_str()); 141 | if (result7.valid()) { 142 | std::cout << GREEN << "found" << RESET << ", do a range 6 scan" 143 | << std::endl; 144 | for (int i = 0; i < scan_range && result7.not_finish(); ++i) { 145 | auto _kv = result7.getKV(); 146 | std::cout << "[Example 7][Scan]: (" << YELLOW << _kv->k << RESET 147 | << ", " << BLUE << _kv->read() << RESET << ")" 148 | << std::endl; 149 | result7.next(); 150 | } 151 | } else { 152 | std::cout << RED << "not found" << RESET << std::endl; 153 | } 154 | 155 | index.destroy(); 156 | } 157 | 158 | int main() { 159 | prepareData(); 160 | exampleMain(); 161 | freeData(); 162 | } -------------------------------------------------------------------------------- /lits/lits_model.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "lits_base.hpp" 4 | 5 | #include 6 | #include 7 | 8 | namespace lits { 9 | 10 | /** 11 | * Hash-enhanced Prefix Table 12 | */ 13 | class HPT { 14 | public: 15 | // Attenuation factor, should be in (0, 1] 16 | static constexpr double AF = 0.5; 17 | 18 | // Position hash bits length 19 | static constexpr int PS_HASH_LEN = 5; 20 | 21 | // Front char hash bits length 22 | static constexpr int FC_HASH_LEN = 5; 23 | 24 | // Position hash mask and front char hash mask 25 | static constexpr uint32_t PS_MASK = (1 << PS_HASH_LEN) - 1; 26 | static constexpr uint32_t FC_MASK = (1 << FC_HASH_LEN) - 1; 27 | 28 | // Position array length and front char array length 29 | static constexpr uint32_t PS_SZ = PS_MASK + 1; 30 | static constexpr uint32_t FC_SZ = FC_MASK + 1; 31 | 32 | // Units in a table line 33 | class UNI { 34 | public: 35 | double CDF; // Cumulative Distribution Function 36 | double PRO; 37 | 38 | public: 39 | UNI() : CDF(0), PRO(0) {} 40 | ~UNI() = default; 41 | }; 42 | 43 | // Hash-enhanced Prefix Table 44 | UNI *m[PS_SZ][FC_SZ]; 45 | 46 | public: 47 | HPT() { 48 | // The table cannot be too large! 49 | // ST_ASSERT((PS_HASH_LEN + FC_HASH_LEN) <= 12); 50 | 51 | for (int i = 0; i < PS_SZ; ++i) { 52 | for (int j = 0; j < FC_SZ; ++j) { 53 | m[i][j] = new UNI[MAX_CH]; 54 | } 55 | } 56 | } 57 | 58 | ~HPT() { destroy(); }; 59 | 60 | void destroy() { 61 | for (int i = 0; i < PS_SZ; ++i) { 62 | for (int j = 0; j < FC_SZ; ++j) { 63 | delete[] m[i][j]; 64 | } 65 | } 66 | } 67 | 68 | /** 69 | * Return the byte size of a UNI. 70 | */ 71 | size_t unit_size() { return sizeof(UNI); } 72 | 73 | /** 74 | * Return the byte size of the model. 75 | */ 76 | size_t model_size() { return sizeof(UNI) * PS_SZ * FC_SZ * MAX_CH; } 77 | 78 | /** 79 | * Train the HPT. 80 | * @param keys The input keys. 81 | * @param len The input data size. 82 | * 83 | * @return true when success, false otherwise. 84 | */ 85 | bool train(const str *keys, const int len) { 86 | // Variables 87 | double this_line_wgt; 88 | double weight[256]; 89 | unsigned char src_ch, dst_ch; 90 | 91 | // Global common prefix length 92 | const uint8_t gcpl = ucpl(keys[0], keys[len - 1]); 93 | 94 | // Init the weight 95 | weight[0] = 1; 96 | for (int i = 1; i < 256; ++i) { 97 | weight[i] = weight[i - 1] * AF; 98 | } 99 | 100 | // Recording the pairs 101 | for (int i = 0; i < len; ++i) { 102 | // We only consider the distinguishing prefix 103 | int max_len = 0; 104 | 105 | if (i == 0) 106 | max_len = ucpl(keys[0], keys[1]) + 1; 107 | else if (i == len - 1) 108 | max_len = ucpl(keys[len - 1], keys[len - 2]) + 1; 109 | else 110 | max_len = std::max(ucpl(keys[i], keys[i - 1]), 111 | ucpl(keys[i], keys[i + 1])) + 112 | 1; 113 | 114 | // Record the occurance frequency in table 115 | for (int b = gcpl; b < std::min(ustrlen(keys[i]), max_len); 116 | ++b) { 117 | dst_ch = keys[i][b]; 118 | int _ps = b & PS_MASK; 119 | int _fc = b == 0 ? 0 : (keys[i][b - 1] & FC_MASK); 120 | m[_ps][_fc][dst_ch].CDF += weight[b - gcpl]; 121 | } 122 | } 123 | 124 | // Generate the cdf distribution from the frequency 125 | for (int x = 0; x < PS_SZ; ++x) { 126 | for (int y = 0; y < FC_SZ; ++y) { 127 | this_line_wgt = 0; 128 | for (int j = 0; j < MAX_CH; ++j) { 129 | this_line_wgt += m[x][y][j].CDF; 130 | } 131 | if (this_line_wgt <= 0) 132 | continue; 133 | for (int j = 0; j < MAX_CH; ++j) { 134 | m[x][y][j].CDF /= this_line_wgt; 135 | m[x][y][j].PRO = m[x][y][j].CDF; 136 | } 137 | double sum = m[x][y][0].CDF; 138 | m[x][y][0].CDF = 0; 139 | for (int j = 1; j < MAX_CH; ++j) { 140 | double tmp = m[x][y][j].CDF; 141 | m[x][y][j].CDF = sum; 142 | sum += tmp; 143 | } 144 | } 145 | } 146 | 147 | // Always success to train 148 | return true; 149 | } 150 | 151 | /** 152 | * Get the position in the LITS node's item array. 153 | * 154 | * @param key The input key. 155 | * @param size The item array's length. 156 | * @param gcpl The group's partial key length. Calculation will skip the 157 | * common prefix. 158 | * @param ssl Second Skip Length. Provided by one byte index. 159 | * @param k The local linear model's slope. 160 | * @param b The local linear model's intercept. 161 | * 162 | * @return a integer which stands for key's position in the node array. 163 | */ 164 | inline int getPos(const str key, const int size, int gcpl, double k = 1, 165 | double b = 0) const { 166 | double ps = size * k; 167 | double c = size * b; 168 | 169 | for (int i = gcpl; key[i] && ps >= 1; ++i) { 170 | const auto &uni = m[i & PS_MASK][key[i - 1] & FC_MASK][key[i]]; 171 | c += ps * uni.CDF; 172 | ps *= uni.PRO; 173 | } 174 | 175 | return static_cast(c); 176 | } 177 | 178 | inline int getPos_woGCPL(const str key, const int size, double k = 1, 179 | double b = 0) const { 180 | double pro = size * k; 181 | double cdf = size * b; 182 | 183 | const auto &uni = m[0][0][key[0]]; 184 | cdf += pro * uni.CDF; 185 | pro *= uni.PRO; 186 | 187 | for (int i = 1; key[i] && pro >= 1; ++i) { 188 | const auto &uni = m[i & PS_MASK][key[i - 1] & FC_MASK][key[i]]; 189 | cdf += pro * uni.CDF; 190 | pro *= uni.PRO; 191 | } 192 | 193 | return static_cast(cdf); 194 | } 195 | 196 | /** 197 | * Return a CDF value of key which is NOT processed by the local model. 198 | */ 199 | inline double getCdf(const str key, int gcpl) const { 200 | double pro = 1; 201 | double cdf = 0; 202 | static constexpr double min_double = 1. / (1UL << 52); 203 | for (int i = gcpl; key[i] && pro >= min_double; ++i) { 204 | const auto &uni = m[i & PS_MASK][key[i - 1] & FC_MASK][key[i]]; 205 | cdf += pro * uni.CDF; 206 | pro *= uni.PRO; 207 | } 208 | return cdf; 209 | } 210 | }; 211 | 212 | }; // namespace lits 213 | -------------------------------------------------------------------------------- /lits/hot_src/include/BitMask32.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __HOT__COMMONS__BIT_MASK_32__ 2 | #define __HOT__COMMONS__BIT_MASK_32__ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | namespace hot { namespace commons { 9 | 10 | constexpr uint32_t ALL_SLOTS_USED_MASK32 = 0xFFFFFFFF; 11 | constexpr uint32_t ALL_SLOTS_EMPTY_MASK32 = 0x00000000; 12 | 13 | constexpr uint8_t NO_SLOT_AVAILABLE32 = 0xFF; 14 | constexpr uint8_t TOTAL_NUMBER_SLOTS = 32; 15 | 16 | class BitMask32Iterator; 17 | 18 | class BitMask32 { 19 | 20 | private: 21 | uint32_t mUsedSlots; 22 | 23 | public: 24 | BitMask32(); 25 | BitMask32(uint32_t slots); 26 | BitMask32(int slots); 27 | 28 | static BitMask32 create(std::initializer_list slotsToReserve); 29 | 30 | inline uint8_t getAndAllocateSlot(); 31 | 32 | inline void reserveSlot(uint_fast8_t slotIndex); 33 | 34 | inline void removeSlot(const uint_fast8_t slotIndex); 35 | 36 | inline void removeEntries(const uint32_t maskForEntriesToRemove); 37 | 38 | inline void removeEntries(const BitMask32 maskForEntriesToRemove); 39 | 40 | inline uint32_t const & getMask() const; 41 | 42 | inline void setMask(uint32_t newMask); 43 | 44 | inline bool isSlotInUse(const uint_fast8_t slotIndex) const; 45 | 46 | inline void toggleSlot(const uint_fast8_t slotIndex, bool newValue); 47 | 48 | inline bool isEmpty() const; 49 | 50 | inline bool isFull() const; 51 | 52 | inline int getFirstUsedSlot() const; 53 | 54 | inline int consumeFirst(); 55 | 56 | inline int getNumberUsedSlots() const; 57 | 58 | inline BitMask32 &operator|=(BitMask32 const &other); 59 | 60 | inline BitMask32 &addAll(BitMask32 const &other); 61 | 62 | inline BitMask32 &operator&=(BitMask32 const &other); 63 | 64 | inline BitMask32 operator|(BitMask32 const &other) const; 65 | 66 | inline BitMask32 operator&(BitMask32 const &other) const; 67 | 68 | inline bool operator==(BitMask32 const &other) const; 69 | 70 | inline bool operator!=(BitMask32 const &other) const; 71 | 72 | BitMask32Iterator begin() const; 73 | 74 | BitMask32Iterator end() const; 75 | }; 76 | 77 | class BitMask32Iterator : public std::iterator { 78 | 79 | private: 80 | BitMask32 mCopy; 81 | int mCurrent; 82 | 83 | void advance(); 84 | 85 | public: 86 | BitMask32Iterator(); 87 | BitMask32Iterator(BitMask32 const &original); 88 | 89 | int operator*() const; 90 | int* operator->() const; 91 | 92 | //prefix operator 93 | BitMask32Iterator& operator++(); 94 | BitMask32Iterator operator++(int); 95 | inline bool operator==(BitMask32Iterator const &rhs) const; 96 | inline bool operator!=(BitMask32Iterator const &rhs) const; 97 | 98 | }; 99 | 100 | }} 101 | 102 | namespace hot { namespace commons { 103 | 104 | inline BitMask32::BitMask32() : mUsedSlots(0) { 105 | } 106 | 107 | inline BitMask32::BitMask32(uint32_t slots) : mUsedSlots(slots) { 108 | } 109 | 110 | inline BitMask32::BitMask32(int slots) : mUsedSlots(slots) { 111 | } 112 | 113 | inline BitMask32 BitMask32::create(std::initializer_list slotsToReserve) { 114 | BitMask32 slots; 115 | for (uint8_t slot : slotsToReserve) { 116 | slots.reserveSlot(slot); 117 | } 118 | return slots; 119 | } 120 | 121 | inline uint8_t BitMask32::getAndAllocateSlot() { 122 | if (mUsedSlots == ALL_SLOTS_USED_MASK32) { 123 | return NO_SLOT_AVAILABLE32; 124 | } 125 | 126 | const uint8_t slotIndex = __builtin_ffs(~mUsedSlots) - 1; 127 | mUsedSlots |= (1 << slotIndex); 128 | return slotIndex; 129 | } 130 | 131 | inline void BitMask32::reserveSlot(uint_fast8_t slotIndex) { 132 | mUsedSlots |= (1 << slotIndex); 133 | } 134 | 135 | inline void BitMask32::removeSlot(const uint_fast8_t slotIndex) { 136 | mUsedSlots &= ~(1 << slotIndex); 137 | } 138 | 139 | inline void BitMask32::removeEntries(const uint32_t maskForEntriesToRemove) { 140 | mUsedSlots &= ~maskForEntriesToRemove; 141 | } 142 | 143 | inline void BitMask32::removeEntries(const BitMask32 maskForEntriesToRemove) { 144 | removeEntries(maskForEntriesToRemove.getMask()); 145 | } 146 | 147 | inline uint32_t const & BitMask32::getMask() const { 148 | return mUsedSlots; 149 | } 150 | 151 | inline void BitMask32::setMask(uint32_t newMask) { 152 | mUsedSlots = newMask; 153 | } 154 | 155 | inline bool BitMask32::isSlotInUse(const uint_fast8_t slotIndex) const { 156 | return mUsedSlots & (1 << slotIndex); 157 | } 158 | 159 | inline void BitMask32::toggleSlot(const uint_fast8_t slotIndex, bool newValue) { 160 | uint32_t deleteMask = (~(1 << slotIndex)); 161 | uint32_t updateMask = (newValue << slotIndex); 162 | mUsedSlots = (mUsedSlots & deleteMask) | updateMask; 163 | } 164 | 165 | inline bool BitMask32::isEmpty() const { 166 | return mUsedSlots == ALL_SLOTS_EMPTY_MASK32; 167 | } 168 | 169 | inline bool BitMask32::isFull() const { 170 | return mUsedSlots == ALL_SLOTS_USED_MASK32; 171 | } 172 | 173 | inline int BitMask32::getFirstUsedSlot() const { 174 | return __builtin_ctz(mUsedSlots); 175 | } 176 | 177 | inline int BitMask32::consumeFirst() { 178 | const int firstSlot = __builtin_ctz(mUsedSlots); 179 | removeSlot(firstSlot); 180 | return firstSlot; 181 | } 182 | 183 | inline int BitMask32::getNumberUsedSlots() const { 184 | return __builtin_popcount(mUsedSlots); 185 | } 186 | 187 | inline BitMask32 & BitMask32::operator|=(BitMask32 const &other) { 188 | return addAll(other); 189 | } 190 | 191 | inline BitMask32 & BitMask32::addAll(BitMask32 const &other) { 192 | mUsedSlots |= other.mUsedSlots; 193 | return *this; 194 | } 195 | 196 | inline BitMask32 & BitMask32::operator&=(BitMask32 const &other) { 197 | mUsedSlots &= other.mUsedSlots; 198 | return *this; 199 | } 200 | 201 | inline BitMask32 BitMask32::operator|(BitMask32 const &other) const { 202 | return {static_cast(mUsedSlots | other.mUsedSlots)}; 203 | } 204 | 205 | inline BitMask32 BitMask32::operator&(BitMask32 const &other) const { 206 | return {static_cast(mUsedSlots & other.mUsedSlots)}; 207 | } 208 | 209 | inline bool BitMask32::operator==(BitMask32 const &other) const { 210 | return mUsedSlots == other.mUsedSlots; 211 | } 212 | 213 | inline bool BitMask32::operator!=(BitMask32 const &other) const { 214 | return mUsedSlots != other.mUsedSlots; 215 | } 216 | 217 | inline BitMask32Iterator BitMask32::begin() const { 218 | return {*this}; 219 | } 220 | 221 | const BitMask32Iterator DEFAULT_SLOTS_END_ITERATOR { }; 222 | 223 | inline BitMask32Iterator BitMask32::end() const { //Move to static end 224 | return DEFAULT_SLOTS_END_ITERATOR; 225 | } 226 | 227 | inline void BitMask32Iterator::advance() { 228 | mCopy.consumeFirst(); 229 | mCurrent = mCopy.getFirstUsedSlot(); 230 | } 231 | 232 | inline BitMask32Iterator::BitMask32Iterator() : mCopy {}, mCurrent {} { 233 | }; 234 | 235 | inline BitMask32Iterator::BitMask32Iterator(BitMask32 const & original) : mCopy { original }, mCurrent { original.getFirstUsedSlot() }{ 236 | } 237 | 238 | inline int BitMask32Iterator::operator*() const { 239 | return mCurrent; 240 | } 241 | 242 | inline int * BitMask32Iterator::operator->() const { 243 | return const_cast(&mCurrent); 244 | } 245 | 246 | 247 | //prefix operator 248 | inline BitMask32Iterator & BitMask32Iterator::operator++() { 249 | assert(!mCopy.isEmpty()); 250 | advance(); 251 | return *this; 252 | } 253 | 254 | inline BitMask32Iterator BitMask32Iterator::operator++ ( int ) { 255 | BitMask32Iterator original { mCopy }; 256 | advance(); 257 | return original; 258 | } 259 | 260 | inline bool BitMask32Iterator::operator==(BitMask32Iterator const & rhs) const { 261 | return mCopy == rhs.mCopy; 262 | } 263 | 264 | inline bool BitMask32Iterator::operator!=(BitMask32Iterator const & rhs) const { 265 | return mCopy != rhs.mCopy; 266 | } 267 | 268 | } } 269 | 270 | #endif -------------------------------------------------------------------------------- /lits/lits_kv.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include "lits_base.hpp" 6 | #include "lits_utils.hpp" 7 | 8 | #define RAW_KV(x) ((kv *)PTR_RAW(x)) 9 | 10 | namespace lits { 11 | 12 | class kv { 13 | public: 14 | uint64_t v; // The value 15 | char k[]; // The key 16 | 17 | /** 18 | * Set the key and value of the kv_load. 19 | * 20 | * @param key The key to be set 21 | * @param val The value to be set 22 | */ 23 | void set(const str key, const uint64_t val) { 24 | // Copy the key to the memory of the kv_load 25 | memcpy(this->k, key, ustrlen(key) + 1); 26 | // Set the value of the kv_load 27 | this->v = val; 28 | } 29 | 30 | /** 31 | * Get the value of the kv_load. 32 | * 33 | * @return The value of the kv_load 34 | */ 35 | inline uint64_t read() const { 36 | // Return the value of the kv_load 37 | return v; 38 | } 39 | 40 | /** 41 | * Verify that the remain part of the string is equal to the given key. 42 | * 43 | * @param key The key to be verified. 44 | * @param ofs The offset of the key to be verified (default: 0). 45 | * 46 | * @return True if the remain part of the string is equal to the given key, 47 | * False otherwise. 48 | */ 49 | inline bool verify(const str key, const int ofs = 0) const { 50 | // Compare the remain part of the string with the given key 51 | return ustrcmp(key + ofs, (char *)(this->k + ofs)) == 0; 52 | } 53 | 54 | /** 55 | * Compare the given key with the key of the kv_load. 56 | * 57 | * This function compares the given key with the key of the kv_load, and 58 | * returns an integer less than, equal to, or greater than zero if key is 59 | * found, respectively, to be less than, to match, or be greater than the 60 | * key of the kv_load. 61 | * 62 | * @param key The key to be compared with the key of the kv_load. 63 | * @param ofs The offset of the key to be compared (default: 0). 64 | * 65 | * @return An integer less than, equal to, or greater than zero if key is 66 | * found, respectively, to be less than, to match, or be greater 67 | * than the key of the kv_load. 68 | */ 69 | inline int keycmp(const str key, const int ofs = 0) const { 70 | return ustrcmp(key + ofs, (char *)(this->k + ofs)); 71 | } 72 | 73 | /** 74 | * Update the value of the kv_load. 75 | * 76 | * @param val The new value of the kv_load. 77 | */ 78 | inline void update(const uint64_t val) { this->v = val; } 79 | 80 | /** 81 | * Verify a part of the string. 82 | * 83 | * This function checks if the given part of the string is equal to the 84 | * part of the key in the kv_load. 85 | * 86 | * @param key The key to be verified. 87 | * @param begin The starting index of the part of the key to be verified 88 | * (inclusive). 89 | * @param end The ending index of the part of the key to be verified 90 | * (exclusive). 91 | * 92 | * @return True if the given part of the string is equal to the part of the 93 | * key in the kv_load, False otherwise. 94 | */ 95 | inline bool part_verify(const str key, const int begin, 96 | const int end) const { 97 | for (int i = begin; i < end; ++i) { 98 | if (key[i] != k[i]) { 99 | return false; 100 | } 101 | } 102 | return true; 103 | } 104 | 105 | /** 106 | * Get the size of the kv_load in bytes. 107 | * 108 | * This function returns the total size of the kv_load in bytes which 109 | * includes the size of the key, the value and the null terminator of the 110 | * key. 111 | * 112 | * @return The size of the kv_load in bytes. 113 | */ 114 | inline size_t _len() const { 115 | size_t byte_sz = sizeof(uint64_t) + strlen(k) + 1; 116 | return byte_sz; 117 | } 118 | }; 119 | 120 | /** 121 | * Create a new kv_pair. 122 | * 123 | * This function allocates memory for a new kv_pair and sets the key and value 124 | * of the new kv_pair. It returns a pointer to the new kv_pair. 125 | * 126 | * @param k The key of the new kv_pair. 127 | * @param v The value of the new kv_pair. 128 | * 129 | * @return A pointer to the new kv_pair. 130 | */ 131 | inline kv *new_kv(const str k, const uint64_t v) { 132 | size_t sz = sizeof(kv) + ustrlen(k) + 1; // +1 for null terminator 133 | kv *kvload = (kv *)new uint8_t[sz]; // allocate memory 134 | kvload->set(k, v); // set key and value 135 | return kvload; // return pointer to new kv_pair 136 | } 137 | 138 | /** 139 | * Create a new kv_pair with a hash value embedded in the pointer. 140 | * 141 | * This function allocates memory for a new kv_pair, sets the key and value of 142 | * the new kv_pair and embeds the hash value of the key in the pointer to the 143 | * kv_pair. It returns a pointer to the new kv_pair. 144 | * 145 | * @param k The key of the new kv_pair. 146 | * @param v The value of the new kv_pair. 147 | * 148 | * @return A pointer to the new kv_pair with the hash value embedded in the 149 | * pointer. 150 | */ 151 | inline kv *new_hash_kv(const str k, const uint64_t v) { 152 | kv *ret = new_kv(k, v); 153 | uint64_t hash = hashStr(k); // Calculate the hash value of the key 154 | uint64_t ptr = (uint64_t)(void *)ret; // Get the pointer to the kv_pair 155 | ptr = ptr | (hash << 48); // Embed the hash value in the pointer 156 | return (kv *)(void *)ptr; // Return the pointer to the kv_pair with the hash 157 | // value embedded in it 158 | } 159 | 160 | /** 161 | * Create a new kv_pair with a hash value embedded in the pointer from an 162 | * existing kv_pair. 163 | * 164 | * This function creates a new kv_pair with a hash value embedded in the pointer 165 | * to the kv_pair. The hash value is calculated from the key of the given 166 | * kv_pair and the pointer to the new kv_pair is returned. 167 | * 168 | * @param _kv The kv_pair to create a new kv_pair from. 169 | * 170 | * @return A pointer to the new kv_pair with the hash value embedded in the 171 | * pointer. 172 | */ 173 | inline kv *new_hash_kv(kv *_kv) { 174 | uint64_t hash = hashStr(_kv->k); // Calculate the hash value of the key 175 | uint64_t ptr = (uint64_t)(void *)_kv; // Get the pointer to the kv_pair 176 | ptr = ptr | (hash << 48); // Embed the hash value in the pointer 177 | return (kv *)(void *)ptr; // Return the pointer to the kv_pair with the hash 178 | // value embedded in it 179 | } 180 | 181 | /** 182 | * Extract the hash value from a pointer to a kv_pair with a hash value 183 | * embedded in it. 184 | * 185 | * This function extracts the hash value from the given pointer to a kv_pair 186 | * with a hash value embedded in it. The hash value is assumed to be in the 187 | * most significant 16 bits of the pointer value. 188 | * 189 | * @param ptr The pointer to the kv_pair with the hash value embedded in it. 190 | * 191 | * @return The hash value of the pointer. 192 | */ 193 | inline uint16_t getHashVal(const kv *ptr) { 194 | uint64_t v = (uint64_t)(void *)ptr; // Cast the pointer to an integer 195 | return (v >> 48) & 0xffff; // Extract the hash value and return it 196 | } 197 | 198 | /** 199 | * @brief Destroy a kv_pair. 200 | * 201 | * This function destroys a kv_pair allocated using new_kv. The kv_pair 202 | * pointer should not be used after this function has been called. 203 | * 204 | * @param kv_load The kv_pair to destroy. 205 | */ 206 | void free_kv(kv *kv_load) { 207 | kv *raw_kv = (kv *)PTR_RAW(kv_load); // Get the pointer to the raw kv_pair 208 | delete[] reinterpret_cast( 209 | raw_kv); // Delete the raw kv_pair from memory 210 | } 211 | 212 | /** 213 | * Sub-Trie KV 214 | */ 215 | class ST_kv { 216 | public: 217 | kv *_kv; 218 | ST_kv() : _kv(NULL){}; 219 | ST_kv(kv *kv) : _kv(kv){}; 220 | ST_kv(const str _k, const uint64_t _v) { _kv = new_kv(_k, _v); } 221 | inline uint64_t read() const { return _kv->read(); } 222 | inline const char *getKey() const { return _kv->k; } 223 | inline kv *getKV() const { return _kv; } 224 | }; 225 | 226 | }; // namespace lits -------------------------------------------------------------------------------- /lits/hot_src/HOTSingleThreadedChildPointerInterface.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __HOT__SINGLE_THREADED__HOT_SINGLE_THREADED_CHILD_POINTER_INTERFACE__ 2 | #define __HOT__SINGLE_THREADED__HOT_SINGLE_THREADED_CHILD_POINTER_INTERFACE__ 3 | 4 | #include 5 | 6 | #include "include/DiscriminativeBit.hpp" 7 | #include "include/NodeType.hpp" 8 | 9 | namespace hot { 10 | namespace singlethreaded { 11 | 12 | struct HOTSingleThreadedNodeBase; 13 | 14 | class HOTSingleThreadedChildPointer { 15 | private: 16 | intptr_t mPointer; 17 | 18 | public: 19 | template 20 | static inline auto castToNode(HOTSingleThreadedNodeBase const* node); 21 | 22 | template 23 | static inline auto castToNode(HOTSingleThreadedNodeBase* node); 24 | 25 | /** 26 | * This method calls the passed operation with the actual node type 27 | * 28 | * Be aware to only call this method on child pointer representing valid 29 | * nodes. For all other cases this may result in undefined behaviour. 30 | * 31 | * @tparam Operation the type of the operation to execute on the node, the 32 | * return type of the operation determines the return type of the node 33 | * @param withPrefetch determines whether the node should be prefetched 34 | * before invoking the operation 35 | * @param operation the type of the operation to execute on the node 36 | * @return the result of the operation invoked on the actual node 37 | */ 38 | template 39 | inline auto executeForSpecificNodeType(bool const withPrefetch, 40 | Operation const& operation) const; 41 | 42 | /** 43 | * This method calls the passed operation with the actual node type 44 | * 45 | * Be aware to only call this method on child pointer representing valid 46 | * nodes. For all other cases this may result in undefined behaviour. 47 | * 48 | * @tparam Operation the type of the operation to execute on the node, the 49 | * return type of the operation determines the return type of the node 50 | * @param withPrefetch determines whether the node should be prefetched 51 | * before invoking the operation 52 | * @param operation the type of the operation to execute on the node 53 | * @return the result of the operation invoked on the actual node 54 | */ 55 | template 56 | inline auto executeForSpecificNodeType(bool const withPrefetch, 57 | Operation const& operation); 58 | 59 | /** 60 | * initializes a default child pointer 61 | * a default child pointer is of type node and its value is a nullpointer to 62 | * a node. 63 | */ 64 | inline HOTSingleThreadedChildPointer(); 65 | 66 | /** 67 | * creates a copy of the child pointer 68 | * @param rawPointer the child pointer to copy 69 | */ 70 | inline HOTSingleThreadedChildPointer( 71 | HOTSingleThreadedChildPointer const& rawPointer); 72 | 73 | /** 74 | * For a given node type and the base node pointer constructs a child 75 | * pointer which is able to deduce the actual nodes type 76 | * 77 | * Be aware that specifying a non matching node type may result in undefined 78 | * behaviour 79 | * 80 | * @param nodeAlgorithmType the actual type of the node to point to 81 | * @param node a pointer to the base node 82 | */ 83 | inline HOTSingleThreadedChildPointer( 84 | hot::commons::NodeType nodeAlgorithmType, 85 | HOTSingleThreadedNodeBase const* node); 86 | 87 | /** 88 | * This initializes a childpointer with the tuple identifier of an entry. 89 | * How this tuple identifier is formed heavily depends on the actual value's 90 | * type 91 | * 92 | * @param leafValue a leaf value represented by its tuple identifier 93 | */ 94 | inline HOTSingleThreadedChildPointer(intptr_t leafValue); 95 | 96 | inline HOTSingleThreadedChildPointer& 97 | operator=(const HOTSingleThreadedChildPointer& other); 98 | 99 | inline bool operator==(HOTSingleThreadedChildPointer const& rhs) const; 100 | 101 | inline bool operator!=(HOTSingleThreadedChildPointer const& rhs) const; 102 | 103 | inline void free() const; 104 | 105 | /** 106 | * Extracts the node type of this child pointer. 107 | * Be aware that this is only defined for actual nodes. In all other cases 108 | * the result is undefined. 109 | * 110 | * @return the type of the node pointed to by this child pointer. 111 | */ 112 | inline hot::commons::NodeType getNodeType() const; 113 | 114 | /** 115 | * Extracts the raw node pointer from this child pointer. 116 | * Be aware that this is only defined for actual nodes. In all other cases 117 | * the result is undefined. 118 | * 119 | * @return a pointer to the node itself 120 | */ 121 | inline HOTSingleThreadedNodeBase* getNode() const; 122 | 123 | /** 124 | * Extracts the tuple identifier of this child pointer. 125 | * Be aware that this is only defined if this child pointer was initialized 126 | * as a leaf child pointer. In all other cases the result is undefined. 127 | * 128 | * @return the stored tuple identifier 129 | */ 130 | inline intptr_t getTid() const; 131 | 132 | /** 133 | * @return whether this child pointer instance points to a leaf value or a 134 | * child node. 135 | */ 136 | inline bool isLeaf() const; 137 | 138 | /** 139 | * @return whether this child pointer points to an actual node instance. It 140 | * is therefore guaranteed that it is no nullptr. 141 | */ 142 | inline bool isAValidNode() const; 143 | 144 | /** 145 | * 146 | * @return whether this child pointer is no leaf value. It is therefore true 147 | * for pointers to actual nodes as well as for nullpointers. 148 | */ 149 | inline bool isNode() const; 150 | 151 | /** 152 | * @return whether this child pointer is of type node and was initialized 153 | * with a nullpr. 154 | */ 155 | inline bool isUnused() const; 156 | 157 | inline uint16_t getHeight() const; 158 | 159 | template inline auto search(Args... args) const; 160 | 161 | /** 162 | * Determines the number of entries in the node represented by this child 163 | * pointer instance. 164 | * 165 | * Be aware that this is only defined for childpointers of type node which 166 | * have been initialized with pointers other than nullptr. 167 | * 168 | * @return the number of entries in the node in 169 | */ 170 | inline unsigned int getNumberEntries() const; 171 | 172 | /** 173 | * Collects all discriminative bits used in the binary trie, this pointers 174 | * instance references to. 175 | * 176 | * @return the set of discriminative bits used in the underlying binary trie 177 | */ 178 | inline std::set getDiscriminativeBits() const; 179 | 180 | /** 181 | * Determines the smallest value rooted in the subtree corresponding to this 182 | * childpointer. If this pointer represents a leaf value it is the leaf 183 | * value itself. Otherwise it is the smallest value of the subtree rooted at 184 | * the node's first child pointer. 185 | * 186 | * Be aware that this function is not defined for childpointers of type 187 | * node, which have been initialized with a nullptr. 188 | * 189 | * @return the smallest value in the subtree corresponding to this 190 | * childpointer. 191 | */ 192 | inline HOTSingleThreadedChildPointer getSmallestLeafValueInSubtree() const; 193 | 194 | /** 195 | * Determines the largest value rooted in the subtree corresponding to this 196 | * childpointer. If this pointer represents a leaf value it is the leaf 197 | * value itself. Otherwise it is the largest value of the subtree rooted at 198 | * the node's last child pointer. 199 | * 200 | * Be aware that this function is not defined for childpointers of type 201 | * node, which have been initialized with a nullptr. 202 | * 203 | * @return the largest value in the subtree corresponding to this 204 | * childpointer. 205 | */ 206 | inline HOTSingleThreadedChildPointer getLargestLeafValueInSubtree() const; 207 | 208 | /** 209 | * deletes all nodes contained in this subtree, including this node itself 210 | */ 211 | inline void deleteSubtree(); 212 | }; 213 | 214 | } // namespace singlethreaded 215 | } // namespace hot 216 | 217 | #endif -------------------------------------------------------------------------------- /lits/lits.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "lits_cnode.hpp" 4 | #include "lits_hot.hpp" 5 | #include "lits_iter.hpp" 6 | #include "lits_kv.hpp" 7 | #include "lits_model.hpp" 8 | #include "lits_node.hpp" 9 | 10 | #include 11 | #include 12 | 13 | namespace lits { 14 | 15 | class LITS { 16 | private: 17 | // For bulk load, the index needs at least 1000 strings to train the model 18 | static const int min_bulk_load_size = 1000; 19 | 20 | // Whether the index has been bulk loaded 21 | bool hasBeenBuild = false; 22 | 23 | // The Global String Model: Hash-enhanced Prefix Table 24 | HPT *hpt; 25 | 26 | // The Structural Decision Tree 27 | PMSS *pmss; 28 | 29 | // The root node of the index. 30 | Item root; 31 | 32 | public: 33 | LITS() = default; 34 | ~LITS() = default; 35 | 36 | bool bulkload(const char **_keys, const uint64_t *_vals, const int _len, 37 | HPT *_hpt = NULL) { 38 | RT_ASSERT(hasBeenBuild == false); 39 | return _bulkload((const str *)_keys, _vals, _len, _hpt); 40 | } 41 | 42 | void destroy() { 43 | RT_ASSERT(hasBeenBuild); 44 | return _destroy(); 45 | } 46 | 47 | kv *lookup(const char *_key) { 48 | RT_ASSERT(hasBeenBuild); 49 | return _lookup((const str)_key); 50 | } 51 | 52 | bool insert(const char *_key, const uint64_t _val) { 53 | RT_ASSERT(hasBeenBuild); 54 | return _insert((const str)_key, (const val)_val); 55 | } 56 | 57 | /** 58 | * If update, return the kv_entry's old value 59 | * If insert, return 0 60 | */ 61 | val upsert(const char *_key, const uint64_t _val) { 62 | RT_ASSERT(hasBeenBuild); 63 | return _upsert((const str)_key, (const val)_val); 64 | } 65 | 66 | bool remove(const char *_key) { 67 | RT_ASSERT(hasBeenBuild); 68 | return _remove((const str)_key); 69 | } 70 | 71 | litsIter find(const char *_key) const { 72 | RT_ASSERT(hasBeenBuild); 73 | return _find((const str)_key); 74 | } 75 | 76 | litsIter begin() const { 77 | RT_ASSERT(hasBeenBuild); 78 | return _begin(); 79 | } 80 | 81 | private: 82 | bool _bulkload(const str *_keys, const uint64_t *_vals, const int _len, 83 | HPT *_hpt = NULL) { 84 | // Check the input is sorted and unique 85 | if (_len < min_bulk_load_size) { 86 | std::cerr << "[Bulk Load]: For bulk load, the index needs at least " 87 | << min_bulk_load_size << " strings!" << std::endl; 88 | return false; 89 | } 90 | 91 | for (int i = 1; i < _len; ++i) { 92 | if (ustrcmp(_keys[i], _keys[i - 1]) < 0) { 93 | std::cerr << "[Bulk Load]: The input strings are not sorted!" 94 | << std::endl; 95 | return false; 96 | } 97 | if (ustrcmp(_keys[i], _keys[i - 1]) == 0) { 98 | std::cerr << "[Bulk Load]: The input strings are not unique!" 99 | << std::endl; 100 | return false; 101 | } 102 | } 103 | 104 | // Train the Hash-enhanced Prefix Table 105 | if (_hpt) { 106 | hpt = _hpt; 107 | } else { 108 | hpt = new HPT(); 109 | hpt->train(_keys, _len); 110 | } 111 | 112 | // Init the Performance Model for Structure Selection 113 | pmss = new PMSS(); 114 | 115 | // Bulk load the root 116 | KVS2 kvs = {(const str *)_keys, (const val *)_vals}; 117 | 118 | root = pmss_bulk(kvs, 0, _len, 0, hpt, pmss); 119 | 120 | hasBeenBuild = true; 121 | return true; 122 | } 123 | 124 | void _destroy() { 125 | delete hpt; 126 | delete pmss; 127 | 128 | KVS1 kvs; 129 | 130 | // Delete the main structure of the index 131 | root.recursive_extract(kvs); 132 | 133 | // Delete the key-value entry of the index 134 | kvs.self_delete(); 135 | } 136 | 137 | kv *_lookup(const str _key) { 138 | int ccpl = 0; 139 | Item item = root; 140 | 141 | while (1) { 142 | switch (item.get_itype()) { 143 | case ITYP_Trie: { 144 | return trie_search(item, _key); 145 | }; 146 | case ITYP_Sing: { 147 | return sing_search(item, _key, ccpl); 148 | }; 149 | case ITYP_CNod: { 150 | return cnod_search(item, _key); 151 | } 152 | case ITYP_Null: { 153 | return NULL; 154 | }; 155 | } 156 | 157 | // Recursively locate the position 158 | item = *item.locate(_key, ccpl, hpt); 159 | } 160 | 161 | return NULL; 162 | } 163 | 164 | bool _insert(const str _key, const val _val) { 165 | int ccpl = 0; 166 | Item *item = &root; 167 | PathStack stack(hpt, pmss); 168 | bool result; 169 | 170 | while (1) { 171 | switch (item->get_itype()) { 172 | case ITYP_Trie: { 173 | result = trie_insert(*item, _key, _val); 174 | goto RET; 175 | } 176 | case ITYP_Sing: { 177 | result = sing_insert(*item, _key, _val, ccpl); 178 | goto RET; 179 | } 180 | case ITYP_CNod: { 181 | result = cnod_insert(*item, _key, _val, hpt, pmss); 182 | goto RET; 183 | } 184 | case ITYP_Null: { 185 | item->set_entry(new_kv(_key, _val)); 186 | result = true; 187 | goto RET; 188 | } 189 | } 190 | 191 | // Record the path 192 | stack.record_path(item, ccpl); 193 | 194 | // Recursively locate the position 195 | item = item->locate(_key, ccpl, hpt); 196 | } 197 | 198 | RET: 199 | 200 | if (result == true) { 201 | stack.change_num(1); 202 | } 203 | 204 | return result; 205 | } 206 | 207 | bool _remove(const str _key) { 208 | int ccpl = 0; 209 | Item *item = &root; 210 | PathStack stack(hpt, pmss); 211 | bool result; 212 | 213 | while (1) { 214 | switch (item->get_itype()) { 215 | case ITYP_Trie: { 216 | result = trie_remove(*item, _key); 217 | goto RET; 218 | } 219 | case ITYP_Sing: { 220 | result = sing_remove(*item, _key, ccpl); 221 | goto RET; 222 | } 223 | case ITYP_CNod: { 224 | result = cnod_remove(*item, _key, hpt, pmss); 225 | goto RET; 226 | } 227 | case ITYP_Null: { 228 | result = false; 229 | goto RET; 230 | } 231 | } 232 | 233 | // Record the path 234 | stack.record_path(item, ccpl); 235 | 236 | // Recursively locate the position 237 | item = item->locate(_key, ccpl, hpt); 238 | } 239 | 240 | RET: 241 | 242 | if (result == true) { 243 | stack.change_num(-1); 244 | } 245 | 246 | return result; 247 | } 248 | 249 | val _upsert(const str _key, const val _val) { 250 | int ccpl = 0; 251 | Item *item = &root; 252 | PathStack stack(hpt, pmss); 253 | val result; 254 | 255 | while (1) { 256 | switch (item->get_itype()) { 257 | case ITYP_Trie: { 258 | result = trie_upsert(*item, _key, _val); 259 | goto RET; 260 | } 261 | case ITYP_Sing: { 262 | result = sing_upsert(*item, _key, _val, ccpl); 263 | goto RET; 264 | } 265 | case ITYP_CNod: { 266 | result = cnod_upsert(*item, _key, _val, hpt, pmss); 267 | goto RET; 268 | } 269 | case ITYP_Null: { 270 | item->set_entry(new_kv(_key, _val)); 271 | result = 0; 272 | goto RET; 273 | } 274 | } 275 | 276 | // Record the path 277 | stack.record_path(item, ccpl); 278 | 279 | // Recursively locate the position 280 | item = item->locate(_key, ccpl, hpt); 281 | } 282 | 283 | RET: 284 | 285 | if (result == 0) { 286 | stack.change_num(1); 287 | } 288 | 289 | return result; 290 | } 291 | 292 | litsIter _find(const str _key) const { 293 | int pos, ccpl = 0; 294 | Item item = root; 295 | 296 | litsIter iter; 297 | 298 | while (1) { 299 | switch (item.get_itype()) { 300 | case ITYP_Trie: { 301 | trie_find(item, _key, iter); 302 | return iter; 303 | }; 304 | case ITYP_Sing: { 305 | sing_find(item, iter); 306 | return iter; 307 | }; 308 | case ITYP_CNod: { 309 | cnod_find(item, _key, iter); 310 | return iter; 311 | } 312 | case ITYP_Null: { 313 | iter.set_invalid(); 314 | return iter; 315 | }; 316 | } 317 | 318 | // Recursively locate the position (and record the path) 319 | item = *recordPath_find(item, _key, ccpl, hpt, iter); 320 | } 321 | 322 | iter.set_invalid(); 323 | return iter; 324 | } 325 | 326 | litsIter _begin() const { 327 | litsIter iter; 328 | iter.FIRST(root); 329 | return iter; 330 | } 331 | }; 332 | }; // namespace lits 333 | -------------------------------------------------------------------------------- /lits/hot_src/include/SingleMaskPartialKeyMappingInterface.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __HOT__COMMONS__SINGLE_MASK_PARTIAL_KEY_MAPPING_INTERFACE_HPP___ 2 | #define __HOT__COMMONS__SINGLE_MASK_PARTIAL_KEY_MAPPING_INTERFACE_HPP___ 3 | 4 | 5 | #include "PartialKeyMappingBase.hpp" 6 | 7 | namespace hot { namespace commons { 8 | 9 | template struct MultiMaskPartialKeyMapping; 10 | 11 | /** 12 | * A partial key mapping which by using an offset and a 64 bit mask is able to extract partial keys consisting of discriminative bits contained in successive 64 bits. 13 | * 14 | */ 15 | class alignas(8) SingleMaskPartialKeyMapping : public PartialKeyMappingBase { 16 | public: 17 | static constexpr uint MINIMUM_EXTRACTION_BIT_COUNT_SUPPORTED = 8; 18 | private: 19 | uint32_t mOffsetInBytes; 20 | uint64_t mSuccessiveExtractionMask; 21 | 22 | public: 23 | inline SingleMaskPartialKeyMapping(SingleMaskPartialKeyMapping const &src); 24 | 25 | /** 26 | * Creates a single mask partial key mapping for a single discriminative bits. 27 | * The resulting partial keys consists only of a single bit. And can therefore only be used to discriminate 2 entries. 28 | * 29 | * @param discriminativeBit the discriminative bit to create an extraction mask for 30 | */ 31 | inline SingleMaskPartialKeyMapping(DiscriminativeBit const &discriminativeBit); 32 | 33 | /** 34 | * Creates a single mask partial key mapping from a set of byte positions and corresponding byte masks. 35 | * Be aware that all byte positions must lie withing an 8 byte range 36 | * 37 | * @param extractionBytePositions the byte positions of the correspond byte masks 38 | * @param extractionByteData the byte masks describing the discriminative bits used in each of the bytes specified previously by the extractionBytePositions 39 | * @param extractionBytesUsedMask if not all of the above byte position/mask pairs schould be considered this mask can be used to determine, which of those pairs should actually be used. 40 | * @param mostSignificantBitIndex 41 | * @param leastSignificantBitIndex 42 | */ 43 | inline SingleMaskPartialKeyMapping( 44 | uint8_t const* extractionBytePositions, 45 | uint8_t const * extractionByteData, 46 | uint32_t const extractionBytesUsedMask, 47 | uint16_t const mostSignificantBitIndex, 48 | uint16_t const leastSignificantBitIndex 49 | ); 50 | 51 | /** 52 | * Creates a new single mask partial key mapping by adding a new discriminating bit. 53 | * Be aware that this is only possible if the all existing discriminative bits and the new discriminating bit lie withing an 8 byte range 54 | * 55 | * @param existing the existing single mask partial key to add the new discriminating bit to. 56 | * @param discriminatingBit the new discriminating bit to add 57 | */ 58 | inline SingleMaskPartialKeyMapping(SingleMaskPartialKeyMapping const &existing, DiscriminativeBit const &discriminatingBit); 59 | 60 | /** 61 | * Creates a new single mask partial with only a subset of the original discriminating bits used 62 | * 63 | * @param existing the existing single mask mapping 64 | * @param maskBitsNeeded a partial key which has only those bits set which should be represented in the new single mask mapping. 65 | */ 66 | inline SingleMaskPartialKeyMapping(SingleMaskPartialKeyMapping const &existing, uint32_t const &maskBitsNeeded); 67 | 68 | /** 69 | * 70 | * @return the number of discriminative bits represented by this single mask partial key mapping. 71 | */ 72 | inline uint16_t calculateNumberBitsUsed() const; 73 | 74 | /** 75 | * Given a key information it generates a prefix up to the position represented by the key information and generates a mask corresponding to the bits defined in this extraction mask 76 | * 77 | * @param significantKeyInformation a description of the position creating a prefix of (the position is exclusive). eg. if the position is 3 the prefix is 11 78 | * @return the mask containing the bits corresponding to the prefix described the key information 79 | */ 80 | template inline PartialKeyType 81 | getPrefixBitsMask(DiscriminativeBit const &significantKeyInformation) const; 82 | 83 | /** 84 | * inserts a new discriminating bit into this single mask mapping. 85 | * The resulting new mask is passed to the provided operation 86 | * 87 | * @tparam Operation a callback which is able to handle the new partial key mapping (can be either of single or multi mask type= 88 | * @param discriminativeBit 89 | * @param operation the operation which process the new discriminative bits mapping 90 | * @return the result of the provided operation 91 | */ 92 | template 93 | inline auto insert(DiscriminativeBit const &discriminativeBit, Operation const &operation) const; 94 | 95 | /** 96 | * extracts only a subset of the discriminative bits. 97 | * The resulting new partial key mapping is passed to the provided operation 98 | * 99 | * @tparam Operation a callback which is able to handle the new partial key mapping (can only be of type single mask) 100 | * @param bitsUsed a partial key having only those bits set which should be part of the new partial key mapping 101 | * @param operation the operation which process the new discriminative bits mapping 102 | * @return the result of the provided operation 103 | */ 104 | template 105 | inline auto extract(uint32_t bitsUsed, Operation const &operation) const; 106 | 107 | /** 108 | * a helper function which invokes a callback with the this partial key mapping itself and the smallest possible partial key type which is necessary to represent partial keys 109 | * constructued by this partial key mappings. 110 | * 111 | * e.g.: 112 | * + for less or equal then 8 discriminative bits => uint8_t 113 | * + for less or equal then 16 discriminative bits => uint16_t 114 | * + for less or equal then 32 discriminative bits => uint32_t 115 | * 116 | * 117 | * @tparam Operation the type of the callback to invoke 118 | * @param operation the callback 119 | * @return the result of the operation 120 | */ 121 | template 122 | inline auto executeWithCorrectMaskAndDiscriminativeBitsRepresentation(Operation const &operation) const; 123 | 124 | /** 125 | * 126 | * @return whether all 8 bytes of the underlying mask are used 127 | */ 128 | inline bool hasUnusedBytes() const; 129 | 130 | /** 131 | * 132 | * @return a mask of all the bytes used in the underlying mask 133 | */ 134 | inline uint32_t getUsedBytesMask() const; 135 | 136 | /** 137 | * @return the internal offset of the stored underlying mask 138 | */ 139 | inline uint32_t getByteOffset() const; 140 | 141 | inline uint8_t getExtractionByte(unsigned int byteIndex) const; 142 | inline uint8_t getExtractionBytePosition(unsigned int byteIndex) const; 143 | 144 | /** 145 | * 146 | * @return a partial key with only the highest bit set 147 | */ 148 | inline uint32_t getMaskForHighestBit() const; 149 | 150 | /** 151 | * 152 | * @param discriminativeBit the only discriminative bit to extract 153 | * @return a partial key with only this discriminative bit set 154 | */ 155 | inline uint32_t getMaskFor(DiscriminativeBit const &discriminativeBit) const; 156 | 157 | /** 158 | * 159 | * @return a mask with all mask bits set. This results in a mask like 00111111 where the number of 1s is equal to the number of keybits 160 | */ 161 | inline uint32_t getAllMaskBits() const; 162 | 163 | inline uint32_t extractMask(uint8_t const *keyBytes) const; 164 | 165 | /** 166 | * 167 | * @return a key which has only those discriminative bits set which are represented by this partial key mapping 168 | */ 169 | inline std::array createIntermediateKeyWithOnlySignificantBitsSet() const; 170 | 171 | private: 172 | //delegating constructor 173 | inline SingleMaskPartialKeyMapping(SingleMaskPartialKeyMapping const &existing, uint64_t const newExtractionMaskWithSameOffset); 174 | 175 | inline uint32_t extractMaskFromSuccessiveBytes(uint64_t const inputMask) const; 176 | 177 | inline __m64 getRegister() const; 178 | 179 | inline uint64_t getSuccessiveMaskForBit(uint const bytePosition, uint const byteRelativeBitPosition) const; 180 | 181 | static inline uint convertToIndexOfOtherEndiness(uint const maskRelativeBytePosition, uint const byteRelativeBitPosition); 182 | 183 | inline uint64_t getSuccessiveMaskForAbsoluteBitPosition(uint absoluteBitPosition) const; 184 | 185 | inline uint64_t getSuccessiveMaskForMask(uint32_t const mask) const; 186 | 187 | static inline uint getSuccesiveByteOffsetForLeastSignificantBitIndex(uint leastSignificantBitIndex); 188 | 189 | static inline uint16_t calculateRelativeMostSignificantBitIndex(uint64_t rawExtractionMask); 190 | 191 | static inline uint16_t calculateRelativeLeastSignificantBitIndex(uint64_t rawExtractionMask); 192 | 193 | static inline uint64_t getSuccessiveExtractionMaskFromRandomBytes( 194 | uint8_t const * extractionBytePositions, 195 | uint8_t const * extractionByteData, 196 | uint32_t extractionBytesUsedMask, 197 | uint32_t const offsetInBytes 198 | ); 199 | 200 | public: 201 | /** 202 | * Gets a partial key with only the most significant bit, this function is solely need for debugging purposes 203 | * Due to little, big endian differences the extraction information must be considered. 204 | * 205 | * e.g. bit for position 0 might be the 7th bit of the mask. 206 | * With less significant bits stored in the bits 8 till 31 207 | * 208 | * @param mask the mask to extract the most significan bit from 209 | * @return the most significant bit of the given mask. 210 | */ 211 | template inline PartialKeyType getMostSignifikantMaskBit(PartialKeyType mask) const; 212 | 213 | /** 214 | * 215 | * @param partialKey 216 | * @return the least significant bit set in the given partial key 217 | */ 218 | inline uint16_t getLeastSignificantBitIndex(uint32_t partialKey) const; 219 | 220 | /** 221 | * 222 | * @return a set of all the discriminative bit positions represnted by this partial key mapping 223 | */ 224 | inline std::set getDiscriminativeBits() const; 225 | }; 226 | 227 | } } 228 | 229 | #endif -------------------------------------------------------------------------------- /lits/lits_gpkl.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "lits_base.hpp" 4 | 5 | namespace lits { 6 | 7 | /** 8 | * Returns the length of a null-terminated string. 9 | * 10 | * @param s a null-terminated string 11 | * 12 | * @return the length of the string, excluding the terminating null character 13 | */ 14 | inline int ustrlen(const str s) { 15 | /* Iterate over the string until we reach the null character, 16 | and return the number of characters we encountered. */ 17 | int i = 0; 18 | for (; s[i]; ++i) { 19 | // do nothing, just iterate 20 | } 21 | return i; 22 | } 23 | 24 | /** 25 | * Return the common prefix length of two strings. 26 | * 27 | * Find the length of the common prefix of two null-terminated strings s1 and 28 | * s2. 29 | * 30 | * @param s1 a null-terminated string 31 | * @param s2 a null-terminated string 32 | * 33 | * @return the length of the common prefix of s1 and s2 34 | */ 35 | inline int ucpl(const str s1, const str s2) { 36 | int i = 0; 37 | /* Find the first character that differs between s1 and s2, or the 38 | null character if the strings are identical up to that point. */ 39 | for (; s1[i] && s2[i] && s1[i] == s2[i]; ++i) 40 | ; /* do nothing, just iterate */ 41 | return i; 42 | } 43 | 44 | /** 45 | * Return the common prefix length of two std::strings. 46 | * 47 | * Find the length of the common prefix of two null-terminated strings s1 and 48 | * s2. The null-terminated C-style strings are obtained by calling c_str() on 49 | * the std::string objects. 50 | * 51 | * @param s1 a std::string 52 | * @param s2 a std::string 53 | * 54 | * @return the length of the common prefix of s1 and s2 55 | */ 56 | inline int ucpl(const std::string &s1, const std::string &s2) { 57 | // Get the null-terminated C-style string representations of s1 and s2 58 | const char *cs1 = s1.c_str(); 59 | const char *cs2 = s2.c_str(); 60 | return ucpl(cs1, cs2); 61 | } 62 | 63 | /** 64 | * Return the distinguishing prefix length of string s1 and s2. 65 | * 66 | * This function returns the length of the common prefix of s1 and s2 plus 1. 67 | * If the strings have a common prefix of length n, then s1 and s2 differ in 68 | * their n+1-th character. 69 | */ 70 | inline int udpl(const str s1, const str s2) { return ucpl(s1, s2) + 1; } 71 | 72 | /** 73 | * Return the distinguishing prefix length of two std::strings. 74 | * 75 | * @param s1, s2 input strings 76 | * 77 | * @return the length of the common prefix plus 1 78 | */ 79 | inline int udpl(const std::string &s1, const std::string &s2) { 80 | return ucpl(s1, s2) + 1; 81 | } 82 | 83 | /** 84 | * Return the longest distinguishing prefix length between s1, s2 and s3. 85 | * 86 | * This function returns the longest of the two distinguishing prefix lengths 87 | * between s1 and s2, and between s2 and s3. The strings must be given in 88 | * non-decreasing order, i.e. s1 <= s2 <= s3. 89 | * 90 | * @param s1 the first string 91 | * @param s2 the second string 92 | * @param s3 the third string 93 | * 94 | * @return the longest distinguishing prefix length between s1, s2 and s3 95 | */ 96 | inline int udpl(const str s1, const str s2, const str s3) { 97 | /* Find the longest of the two distinguishing prefix lengths between 98 | s1 and s2, and between s2 and s3. */ 99 | return std::max(udpl(s1, s2), udpl(s2, s3)); 100 | } 101 | 102 | /** 103 | * Return the longest distinguishing prefix length between s1, s2, and s3. 104 | * 105 | * This function returns the longest of the two distinguishing prefix lengths 106 | * between s1 and s2, and between s2 and s3. The strings must be given in 107 | * non-decreasing order, i.e. s1 <= s2 <= s3. 108 | * 109 | * @param s1 the first string 110 | * @param s2 the second string 111 | * @param s3 the third string 112 | * 113 | * @return the longest distinguishing prefix length between s1, s2, and s3 114 | */ 115 | inline int udpl(const std::string &s1, const std::string &s2, 116 | const std::string &s3) { 117 | // Find the longest of the two distinguishing prefix lengths between 118 | // s1 and s2, and between s2 and s3. 119 | return std::max(udpl(s1, s2), udpl(s2, s3)); 120 | } 121 | 122 | /** 123 | * Calculate the local group partial key length (LPKL) of a group. 124 | * 125 | * The LPKL is a measure of how much information each element in the group 126 | * provides to the group's key. A higher LPKL means that each element in the 127 | * group provides more information to the group's key. 128 | * 129 | * The LPKL is calculated as the average length of the distinguishing prefixes 130 | * between each element in the group and the previous element, plus the 131 | * following element, minus the group common prefix length. The group common 132 | * prefix length is the length of the common prefix of all elements in the 133 | * group. 134 | * 135 | * @param keys the group keys, in non-decreasing order 136 | * @param len the number of keys in the group 137 | * 138 | * @return the local group partial key length of the group 139 | */ 140 | inline double lpkl(const str *keys, const int len) { 141 | // Group Common Prefix Length 142 | double gcpl = ucpl(keys[0], keys[len - 1]); 143 | double dkl_sum = 0; 144 | 145 | // Calculate the average length of the distinguishing prefixes between each 146 | // element in the group and its neighbors. 147 | for (int i = 0; i < len; ++i) { 148 | if (i == 0) 149 | dkl_sum += udpl(keys[0], keys[1]); 150 | else if (i == len - 1) 151 | dkl_sum += udpl(keys[len - 2], keys[len - 1]); 152 | else 153 | dkl_sum += udpl(keys[i - 1], keys[i], keys[i + 1]); 154 | } 155 | 156 | double avg_dkl = dkl_sum / len; 157 | return avg_dkl - gcpl; 158 | } 159 | 160 | /** 161 | * Compute the local partial key length of a group of strings. 162 | * 163 | * @param keys a vector of std::strings representing the group 164 | * 165 | * @return the local partial key length of the group 166 | */ 167 | inline double lpkl(const std::vector &keys) { 168 | // Local Common Prefix Length 169 | const int len = keys.size(); // Length of the group 170 | double lcpl = ucpl(keys[0], keys[len - 1]); // Local Common Prefix Length 171 | double dkl_sum = 0; // Sum of the Distinguishing Prefix Lengths 172 | 173 | // Calculate the average length of the distinguishing prefixes between each 174 | // element in the group and its neighbors. 175 | for (int i = 0; i < len; ++i) { 176 | if (i == 0) 177 | dkl_sum += udpl( 178 | keys[0], keys[1]); // Prefix length of first and second elements 179 | else if (i == len - 1) 180 | dkl_sum += udpl( 181 | keys[len - 2], 182 | keys[len - 1]); // Prefix length of last and last-1 elements 183 | else 184 | dkl_sum += udpl( 185 | keys[i - 1], keys[i], 186 | keys[i + 1]); // Prefix length of ith element and its neighbors 187 | } 188 | 189 | double avg_dkl = dkl_sum / len; // Average Distinguishing Prefix Length 190 | return avg_dkl - lcpl; // Local Partial Key Length 191 | } 192 | 193 | template 194 | double getGPKL(const records &kvs, const int l, const int r) { 195 | const int len = r - l; // Length of the group 196 | double lcpl = ucpl(kvs[l].k, kvs[r - 1].k); // Local Common Prefix Length 197 | double dkl_sum = 0; // Sum of the Distinguishing Prefix Lengths 198 | 199 | // Calculate the average length of the distinguishing prefixes between each 200 | // element in the group and its neighbors. 201 | for (int i = l; i < r; ++i) { 202 | if (i == l) 203 | dkl_sum += udpl( 204 | kvs[l].k, 205 | kvs[l + 1].k); // Prefix length of first and second elements 206 | else if (i == r - 1) 207 | dkl_sum += 208 | udpl(kvs[r - 2].k, 209 | kvs[r - 1].k); // Prefix length of last and last-1 elements 210 | else 211 | dkl_sum += udpl( 212 | kvs[i - 1].k, kvs[i].k, 213 | kvs[i + 1].k); // Prefix length of ith element and its neighbors 214 | } 215 | 216 | double avg_dkl = dkl_sum / len; // Average Distinguishing Prefix Length 217 | return avg_dkl - lcpl; // Local Partial Key Length 218 | } 219 | 220 | /** 221 | * Compares two null-terminated strings and returns their ordering. 222 | * 223 | * @param s1 a null-terminated string 224 | * @param s2 a null-terminated string 225 | * 226 | * @return 1 if s1 > s2, -1 if s1 < s2, 0 if s1 == s2 227 | */ 228 | inline int ustrcmp(const str s1, const str s2) { 229 | /* Iterate over both strings until we reach a character that differs 230 | * between the two strings, or until we reach the null character in both 231 | * strings. */ 232 | int i = 0; 233 | for (; s1[i] && s2[i] && s1[i] == s2[i]; ++i) 234 | ; 235 | 236 | /* If the characters at the current position in both strings are equal, 237 | * then the strings are equal. */ 238 | if (s1[i] == s2[i]) 239 | return 0; // s1 == s2 240 | 241 | /* Otherwise, return the ordering of the characters at the current position 242 | * in both strings. */ 243 | return s1[i] > s2[i] ? 1 : -1; 244 | } 245 | 246 | /** 247 | * Compare the given number of characters of two null-terminated strings and 248 | * return their ordering. 249 | * 250 | * @param s1 a null-terminated string 251 | * @param s2 a null-terminated string 252 | * @param len the number of characters to compare 253 | * 254 | * @return 1 if s1 > s2, -1 if s1 < s2, 0 if s1 == s2 255 | */ 256 | inline int ustrcmp(const str s1, const str s2, const int len) { 257 | /* Iterate over the given number of characters of both strings and compare 258 | * each character. If we encounter a character that differs between the two 259 | * strings, return the ordering of those characters. If all characters are 260 | * equal, then the strings are equal. */ 261 | for (int i = 0; i < len; ++i) { 262 | if (s1[i] != s2[i]) { 263 | return s1[i] > s2[i] ? 1 : -1; 264 | } 265 | } 266 | return 0; // s1 == s2 267 | } 268 | 269 | }; // namespace lits 270 | -------------------------------------------------------------------------------- /lits/hot_src/HOTSingleThreadedChildPointer.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __HOT__SINGLE_THREADED__HOT_SINGLE_THREADED_CHILD_POINTER__ 2 | #define __HOT__SINGLE_THREADED__HOT_SINGLE_THREADED_CHILD_POINTER__ 3 | 4 | #include "include/NodeParametersMapping.hpp" 5 | #include "include/NodeType.hpp" 6 | 7 | #include "HOTSingleThreadedChildPointerInterface.hpp" 8 | #include "HOTSingleThreadedNode.hpp" 9 | 10 | namespace hot { namespace singlethreaded { 11 | 12 | constexpr intptr_t NODE_ALGORITHM_TYPE_EXTRACTION_MASK = 0x7u; 13 | constexpr intptr_t POINTER_AND_IS_LEAF_VALUE_MASK = 15u; 14 | constexpr intptr_t POINTER_EXTRACTION_MASK = ~(POINTER_AND_IS_LEAF_VALUE_MASK); 15 | 16 | template inline auto HOTSingleThreadedChildPointer::castToNode(HOTSingleThreadedNodeBase const * node) { 17 | using DiscriminativeBitsRepresentationType = typename hot::commons::NodeTypeToNodeParameters::PartialKeyMappingType; 18 | using PartialKeyType = typename hot::commons::NodeTypeToNodeParameters::PartialKeyType; 19 | return reinterpret_cast const *>(node); 20 | } 21 | 22 | template inline auto HOTSingleThreadedChildPointer::castToNode(HOTSingleThreadedNodeBase * node) { 23 | using DiscriminativeBitsRepresentationType = typename hot::commons::NodeTypeToNodeParameters::PartialKeyMappingType; 24 | using PartialKeyType = typename hot::commons::NodeTypeToNodeParameters::PartialKeyType; 25 | return reinterpret_cast *>(node); 26 | } 27 | 28 | template inline __attribute__((always_inline)) auto HOTSingleThreadedChildPointer::executeForSpecificNodeType(bool const withPrefetch, Operation const & operation) const { 29 | HOTSingleThreadedNodeBase const * node = getNode(); 30 | 31 | // auto node_type = getNodeType(); 32 | // printf ("Node Type is %s\n", nodeAlgorithmToString(node_type).c_str()); 33 | 34 | if(withPrefetch) { 35 | __builtin_prefetch(node); 36 | __builtin_prefetch(reinterpret_cast(node) + 64); 37 | __builtin_prefetch(reinterpret_cast(node) + 128); 38 | __builtin_prefetch(reinterpret_cast(node) + 192); 39 | } 40 | 41 | switch(getNodeType()) { 42 | case hot::commons::NodeType ::SINGLE_MASK_8_BIT_PARTIAL_KEYS: 43 | return operation(*castToNode(node)); 44 | case hot::commons::NodeType ::SINGLE_MASK_16_BIT_PARTIAL_KEYS: 45 | return operation(*castToNode(node)); 46 | case hot::commons::NodeType ::SINGLE_MASK_32_BIT_PARTIAL_KEYS: 47 | return operation(*castToNode(node)); 48 | case hot::commons::NodeType ::MULTI_MASK_8_BYTES_AND_8_BIT_PARTIAL_KEYS: 49 | return operation(*castToNode(node)); 50 | case hot::commons::NodeType ::MULTI_MASK_8_BYTES_AND_16_BIT_PARTIAL_KEYS: 51 | return operation(*castToNode(node)); 52 | case hot::commons::NodeType ::MULTI_MASK_8_BYTES_AND_32_BIT_PARTIAL_KEYS: 53 | return operation(*castToNode(node)); 54 | case hot::commons::NodeType ::MULTI_MASK_16_BYTES_AND_16_BIT_PARTIAL_KEYS: 55 | return operation(*castToNode(node)); 56 | default: //hot::commons::NodeType ::MULTI_MASK_32_BYTES_AND_32_BIT_PARTIAL_KEYS: 57 | return operation(*castToNode(node)); 58 | } 59 | } 60 | 61 | template inline __attribute__((always_inline)) auto HOTSingleThreadedChildPointer::executeForSpecificNodeType(bool const withPrefetch, Operation const & operation) { 62 | HOTSingleThreadedNodeBase * node = getNode(); 63 | 64 | if(withPrefetch) { 65 | __builtin_prefetch(node); 66 | __builtin_prefetch(reinterpret_cast(node) + 64); 67 | __builtin_prefetch(reinterpret_cast(node) + 128); 68 | __builtin_prefetch(reinterpret_cast(node) + 192); 69 | } 70 | 71 | switch(getNodeType()) { 72 | case hot::commons::NodeType ::SINGLE_MASK_8_BIT_PARTIAL_KEYS: 73 | return operation(*castToNode(node)); 74 | case hot::commons::NodeType ::SINGLE_MASK_16_BIT_PARTIAL_KEYS: 75 | return operation(*castToNode(node)); 76 | case hot::commons::NodeType ::SINGLE_MASK_32_BIT_PARTIAL_KEYS: 77 | return operation(*castToNode(node)); 78 | case hot::commons::NodeType ::MULTI_MASK_8_BYTES_AND_8_BIT_PARTIAL_KEYS: 79 | return operation(*castToNode(node)); 80 | case hot::commons::NodeType ::MULTI_MASK_8_BYTES_AND_16_BIT_PARTIAL_KEYS: 81 | return operation(*castToNode(node)); 82 | case hot::commons::NodeType ::MULTI_MASK_8_BYTES_AND_32_BIT_PARTIAL_KEYS: 83 | return operation(*castToNode(node)); 84 | case hot::commons::NodeType ::MULTI_MASK_16_BYTES_AND_16_BIT_PARTIAL_KEYS: 85 | return operation(*castToNode(node)); 86 | default: //hot::commons::NodeType ::MULTI_MASK_32_BYTES_AND_32_BIT_PARTIAL_KEYS: 87 | return operation(*castToNode(node)); 88 | } 89 | } 90 | 91 | inline HOTSingleThreadedChildPointer::HOTSingleThreadedChildPointer() : mPointer(reinterpret_cast(nullptr)) { 92 | } 93 | 94 | inline HOTSingleThreadedChildPointer::HOTSingleThreadedChildPointer(HOTSingleThreadedChildPointer const & other) 95 | : mPointer(other.mPointer) 96 | { 97 | } 98 | 99 | inline HOTSingleThreadedChildPointer::HOTSingleThreadedChildPointer(hot::commons::NodeType nodeAlgorithmType, HOTSingleThreadedNodeBase const *node) 100 | : mPointer((reinterpret_cast(node) | static_cast(nodeAlgorithmType)) << 1) { 101 | } 102 | 103 | inline HOTSingleThreadedChildPointer::HOTSingleThreadedChildPointer(intptr_t leafValue) 104 | : mPointer((leafValue << 1) | 1) { 105 | } 106 | 107 | inline HOTSingleThreadedChildPointer & HOTSingleThreadedChildPointer::operator=(const HOTSingleThreadedChildPointer &other) { 108 | mPointer = other.mPointer; 109 | // by convention, always return *this 110 | return *this; 111 | } 112 | 113 | inline bool HOTSingleThreadedChildPointer::operator==(HOTSingleThreadedChildPointer const & other) const { 114 | return (mPointer == other.mPointer); 115 | } 116 | 117 | inline bool HOTSingleThreadedChildPointer::operator!=(HOTSingleThreadedChildPointer const & other) const { 118 | return (mPointer != other.mPointer); 119 | } 120 | 121 | inline void HOTSingleThreadedChildPointer::free() const { 122 | executeForSpecificNodeType(false, [&](const auto & node) -> void { 123 | delete &node; 124 | }); 125 | } 126 | 127 | constexpr intptr_t NODE_ALGORITH_TYPE_HELPER_EXTRACTION_MASK = NODE_ALGORITHM_TYPE_EXTRACTION_MASK << 1; 128 | inline hot::commons::NodeType HOTSingleThreadedChildPointer::getNodeType() const { 129 | const unsigned int nodeAlgorithmCode = static_cast(mPointer & NODE_ALGORITH_TYPE_HELPER_EXTRACTION_MASK); // 0b1110 130 | return static_cast(nodeAlgorithmCode >> 1u); 131 | } 132 | 133 | inline HOTSingleThreadedNodeBase* HOTSingleThreadedChildPointer::getNode() const { 134 | intptr_t const nodePointerValue = (mPointer >> 1) & POINTER_EXTRACTION_MASK; 135 | return reinterpret_cast(nodePointerValue); 136 | } 137 | 138 | inline intptr_t HOTSingleThreadedChildPointer::getTid() const { 139 | // The the value stored in the pseudo-leaf 140 | //normally this is undefined behaviour lookup for intrinsic working only on x86 cpus replace with instruction for arithmetic shift 141 | return mPointer >> 1; 142 | } 143 | 144 | inline bool HOTSingleThreadedChildPointer::isLeaf() const { 145 | return mPointer & 1; 146 | } 147 | 148 | inline bool HOTSingleThreadedChildPointer::isNode() const { 149 | return !isLeaf(); 150 | } 151 | 152 | inline bool HOTSingleThreadedChildPointer::isAValidNode() const { 153 | return isNode() & (mPointer != reinterpret_cast(nullptr)); 154 | } 155 | 156 | inline bool HOTSingleThreadedChildPointer::isUnused() const { 157 | return (!isLeaf()) & (getNode() == nullptr); 158 | } 159 | 160 | inline uint16_t HOTSingleThreadedChildPointer::getHeight() const { 161 | return isLeaf() ? 0 : getNode()->mHeight; 162 | } 163 | 164 | template inline __attribute__((always_inline)) auto HOTSingleThreadedChildPointer::search(Args... args) const { 165 | return executeForSpecificNodeType(true, [&](const auto & node) { 166 | return node.search(args...); 167 | }); 168 | } 169 | 170 | inline unsigned int HOTSingleThreadedChildPointer::getNumberEntries() const { 171 | return isLeaf() ? 1 : getNode()->getNumberEntries(); 172 | } 173 | 174 | inline std::set HOTSingleThreadedChildPointer::getDiscriminativeBits() const { 175 | return executeForSpecificNodeType(false, [&](const auto & node) { 176 | return node.mDiscriminativeBitsRepresentation.getDiscriminativeBits(); 177 | }); 178 | } 179 | 180 | inline HOTSingleThreadedChildPointer HOTSingleThreadedChildPointer::getSmallestLeafValueInSubtree() const { 181 | return isLeaf() ? *this : getNode()->getPointers()[0].getSmallestLeafValueInSubtree(); 182 | } 183 | 184 | inline HOTSingleThreadedChildPointer HOTSingleThreadedChildPointer::getLargestLeafValueInSubtree() const { 185 | return isLeaf() ? *this : getNode()->getPointers()[getNode()->getNumberEntries() - 1].getLargestLeafValueInSubtree(); 186 | } 187 | 188 | inline void HOTSingleThreadedChildPointer::deleteSubtree() { 189 | if(isNode() && getNode() != nullptr) { 190 | executeForSpecificNodeType(true, [](auto & node) -> void { 191 | for(HOTSingleThreadedChildPointer & childPointer : node) { 192 | childPointer.deleteSubtree(); 193 | } 194 | delete &node; 195 | }); 196 | } 197 | } 198 | 199 | } } 200 | 201 | #endif 202 | -------------------------------------------------------------------------------- /lits/hot_src/include/SIMDHelper.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __HOT__COMMONS__SIMD_HELPER_HPP___ 2 | #define __HOT__COMMONS__SIMD_HELPER_HPP___ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | namespace hot { namespace commons { 9 | 10 | template struct SIMDRegisterTypeMapper { 11 | }; 12 | 13 | template<> struct SIMDRegisterTypeMapper<64u> { 14 | using SIMDRegisterType = __m64; 15 | }; 16 | 17 | template<> struct SIMDRegisterTypeMapper<128u> { 18 | using SIMDRegisterType = __m128i; 19 | }; 20 | 21 | template<> struct SIMDRegisterTypeMapper<256u> { 22 | using SIMDRegisterType = __m256i; 23 | }; 24 | 25 | template 26 | struct SIMDHelper { 27 | using SIMDRegisterType = typename SIMDRegisterTypeMapper::SIMDRegisterType; 28 | 29 | /** 30 | * Create mask from the most significant bit of each 8-bit element of the input register 31 | * @param inputRegister the variable to extract the most significant bits of 32 | * @return the created mask 33 | */ 34 | static inline uint32_t moveMask8(SIMDRegisterType inputRegister); 35 | 36 | /** 37 | * compares packed 8-bit integers in a and b 38 | * 39 | * @param a first SIMD register 40 | * @param b second SIMD register 41 | * @return for each equal packaged 8-bit integer the value 0xFF is set in the output register 42 | */ 43 | static inline SIMDRegisterType cmpeq_epi8(SIMDRegisterType a, SIMDRegisterType b); 44 | 45 | /** 46 | * Broadcast the 8-bit integer to the simd register 47 | * @param byte the 8-bit integer to broadcase 48 | * @return the created SIMD-register 49 | */ 50 | static inline SIMDRegisterType set1_epi8(uint8_t byte); 51 | 52 | /** 53 | * Creates a SIMD-register with all bits beeing set to 0 54 | * @return the newly created SIMD-register 55 | */ 56 | static inline SIMDRegisterType zero(); 57 | 58 | /** 59 | * calculates the bitwise binary and of two SIMD registers 60 | * 61 | * @param a the first operand 62 | * @param b the second operand 63 | * @return the resulting binary and of the provided parameters 64 | */ 65 | static inline SIMDRegisterType binaryAnd(SIMDRegisterType a, SIMDRegisterType b); 66 | 67 | /** 68 | * calculates the bitwise binary or of two SIMD registers 69 | * 70 | * @param a the first operand 71 | * @param b the second operand 72 | * @return the resulting binary or of the provided parameters 73 | */ 74 | static inline SIMDRegisterType binaryOr(SIMDRegisterType a, SIMDRegisterType b); 75 | 76 | /** 77 | * calculates the bitwise binary and not of two SIMD registers 78 | * 79 | * @param a the first operand 80 | * @param b the second operand 81 | * @return the resulting binary and not of the provided parameters 82 | */ 83 | static inline SIMDRegisterType binaryAndNot(SIMDRegisterType a, SIMDRegisterType b); 84 | 85 | /** 86 | * shifts all entries in the simd register left by 8 bits 87 | * @param a the parameter to shift 88 | * @return the shifted SIMD-register 89 | */ 90 | static inline SIMDRegisterType shiftLeftOneByte(SIMDRegisterType a); 91 | 92 | /** 93 | * loads the data of an equivally sized std array into a SIMD-register 94 | * @param array the array to load 95 | * @return the loaded SIMD-register 96 | */ 97 | static inline SIMDRegisterType toRegister(std::array const & array); 98 | 99 | /** 100 | * loads the data stored at the given address into a SIMD-register 101 | * @param data the data to load 102 | * @return the loaded SIMD-register 103 | */ 104 | static inline SIMDRegisterType toRegister(void const * data); 105 | 106 | /** 107 | * stores the data of the given SIMD register in the provided std array 108 | * 109 | * @param data the register to store 110 | * @param array the target array 111 | */ 112 | static inline void store(SIMDRegisterType data, std::array & array); 113 | 114 | /** 115 | * stores the data of the given SIMD register at the specified target location 116 | * 117 | * @param data the register to store 118 | * @param location the target address 119 | */ 120 | static inline void store(SIMDRegisterType data, void* location); 121 | }; 122 | 123 | template<> 124 | struct SIMDHelper<64u> { 125 | using SIMDRegisterType = typename SIMDRegisterTypeMapper<64>::SIMDRegisterType; 126 | 127 | static inline __attribute__((always_inline)) uint32_t moveMask8(SIMDRegisterType inputRegister) { 128 | return _mm_movemask_pi8(inputRegister); 129 | } 130 | 131 | static inline __attribute__((always_inline)) SIMDRegisterType cmpeq_epi8(SIMDRegisterType a, SIMDRegisterType b) { 132 | return _mm_cmpeq_pi8(a, b); 133 | } 134 | 135 | static inline __attribute__((always_inline)) SIMDRegisterType set1_epi8(uint8_t byte) { 136 | return _mm_set1_pi8(byte); 137 | } 138 | 139 | static inline __attribute__((always_inline)) SIMDRegisterType zero() { 140 | return _mm_setzero_si64(); 141 | } 142 | 143 | static inline __attribute__((always_inline)) SIMDRegisterType binaryAnd(SIMDRegisterType a, SIMDRegisterType b) { 144 | return _mm_and_si64(a, b); 145 | } 146 | 147 | static inline __attribute__((always_inline)) SIMDRegisterType binaryOr(SIMDRegisterType a, SIMDRegisterType b) { 148 | return _mm_or_si64(a, b); 149 | } 150 | 151 | static inline __attribute__((always_inline)) SIMDRegisterType binaryAndNot(SIMDRegisterType a, SIMDRegisterType b) { 152 | return _mm_andnot_si64(a, b); 153 | } 154 | 155 | static inline __attribute__((always_inline)) SIMDRegisterType shiftLeftOneByte(SIMDRegisterType a) { 156 | return _mm_slli_si64(a, 8); 157 | } 158 | 159 | static inline __attribute__((always_inline)) SIMDRegisterType toRegister(std::array const & array) { 160 | return _mm_cvtsi64_m64(*array.data()); 161 | } 162 | 163 | static inline __attribute__((always_inline)) SIMDRegisterType toRegister(void const * data) { 164 | return _mm_cvtsi64_m64(*reinterpret_cast(data)); 165 | } 166 | 167 | 168 | static inline __attribute__((always_inline)) void store(SIMDRegisterType data, std::array & array) { 169 | array[0] = _mm_cvtm64_si64(data); 170 | } 171 | 172 | static inline __attribute__((always_inline)) void store(SIMDRegisterType data, void* location) { 173 | *reinterpret_cast(location) = _mm_cvtm64_si64(data); 174 | }; 175 | 176 | 177 | }; 178 | 179 | template<> 180 | struct SIMDHelper<128u> { 181 | using SIMDRegisterType = typename SIMDRegisterTypeMapper<128>::SIMDRegisterType; 182 | 183 | static inline __attribute__((always_inline)) uint32_t moveMask8(SIMDRegisterType inputRegister) { 184 | return _mm_movemask_epi8(inputRegister); 185 | } 186 | 187 | static inline __attribute__((always_inline))SIMDRegisterType cmpeq_epi8(SIMDRegisterType a, SIMDRegisterType b) { 188 | return _mm_cmpeq_epi8(a, b); 189 | } 190 | 191 | static inline __attribute__((always_inline)) SIMDRegisterType convertWithZeroExtend(SIMDHelper<64u>::SIMDRegisterType sourceRegister) { 192 | return _mm_cvtsi64_si128(_mm_cvtm64_si64(sourceRegister)); 193 | } 194 | 195 | static inline __attribute__((always_inline)) SIMDRegisterType set1_epi8(uint8_t byte) { 196 | return _mm_set1_epi8(byte); 197 | } 198 | 199 | static inline __attribute__((always_inline)) SIMDRegisterType zero() { 200 | return _mm_setzero_si128(); 201 | } 202 | 203 | static inline __attribute__((always_inline)) SIMDRegisterType binaryAnd(SIMDRegisterType a, SIMDRegisterType b) { 204 | return _mm_and_si128(a, b); 205 | } 206 | 207 | static inline __attribute__((always_inline)) SIMDRegisterType binaryOr(SIMDRegisterType a, SIMDRegisterType b) { 208 | return _mm_or_si128(a, b); 209 | } 210 | 211 | static inline __attribute__((always_inline)) SIMDRegisterType binaryAndNot(SIMDRegisterType a, SIMDRegisterType b) { 212 | return _mm_andnot_si128(a, b); 213 | } 214 | 215 | static inline __attribute__((always_inline)) SIMDRegisterType toRegister(std::array const &array) { 216 | return _mm_lddqu_si128(reinterpret_cast(array.data())); 217 | } 218 | 219 | static inline __attribute__((always_inline)) SIMDRegisterType toRegister(void const *rawData) { 220 | return _mm_lddqu_si128(reinterpret_cast(rawData)); 221 | } 222 | 223 | static inline __attribute__((always_inline)) SIMDRegisterType shiftLeftOneByte(SIMDRegisterType a) { 224 | return _mm_bslli_si128(a, 1); 225 | } 226 | 227 | static inline __attribute__((always_inline)) void store(SIMDRegisterType data, std::array &array) { 228 | _mm_storeu_si128(reinterpret_cast(array.data()), data); 229 | } 230 | 231 | static inline __attribute__((always_inline)) void store(SIMDRegisterType data, void *location) { 232 | _mm_storeu_si128(reinterpret_cast(location), data); 233 | } 234 | }; 235 | 236 | template<> 237 | struct SIMDHelper<256u> { 238 | using SIMDRegisterType = typename SIMDRegisterTypeMapper<256>::SIMDRegisterType; 239 | 240 | static inline __attribute__((always_inline)) uint32_t moveMask8(SIMDRegisterType inputRegister) { 241 | return _mm256_movemask_epi8(inputRegister); 242 | } 243 | 244 | static inline __attribute__((always_inline)) SIMDRegisterType cmpeq_epi8(SIMDRegisterType a, SIMDRegisterType b) { 245 | return _mm256_cmpeq_epi8(a, b); 246 | } 247 | 248 | static inline __attribute__((always_inline)) SIMDRegisterType convertWithZeroExtend(SIMDHelper<128u>::SIMDRegisterType sourceRegister) { 249 | return _mm256_insertf128_si256(zero(), sourceRegister, 0); 250 | } 251 | 252 | 253 | static inline __attribute__((always_inline)) SIMDRegisterType set1_epi8(uint8_t byte) { 254 | return _mm256_set1_epi8(byte); 255 | } 256 | 257 | static inline __attribute__((always_inline)) SIMDRegisterType set1(uint8_t byte) { 258 | return _mm256_set1_epi8(byte); 259 | } 260 | 261 | static inline __attribute__((always_inline)) SIMDRegisterType set1(uint16_t unsignedShort) { 262 | return _mm256_set1_epi16(unsignedShort); 263 | } 264 | 265 | static inline __attribute__((always_inline)) SIMDRegisterType set1(uint32_t unsigneInt) { 266 | return _mm256_set1_epi32(unsigneInt); 267 | } 268 | 269 | static inline __attribute__((always_inline)) SIMDRegisterType zero() { 270 | return _mm256_setzero_si256(); 271 | } 272 | 273 | /** 274 | * 275 | * @return an 256 bit simd register with all bits set 276 | */ 277 | static inline __attribute__((always_inline)) SIMDRegisterType maxValue() { 278 | SIMDRegisterType zeroRegister = zero(); 279 | return cmpeq_epi8(zeroRegister, zeroRegister); 280 | } 281 | 282 | static inline __attribute__((always_inline)) SIMDRegisterType binaryAnd(SIMDRegisterType a, SIMDRegisterType b) { 283 | return _mm256_and_si256(a, b); 284 | } 285 | 286 | static inline __attribute__((always_inline)) SIMDRegisterType binaryOr(SIMDRegisterType a, SIMDRegisterType b) { 287 | return _mm256_or_si256(a, b); 288 | } 289 | 290 | static inline __attribute__((always_inline)) SIMDRegisterType binaryAndNot(SIMDRegisterType a, SIMDRegisterType b) { 291 | return _mm256_andnot_si256(a, b); 292 | } 293 | 294 | static inline __attribute__((always_inline)) SIMDRegisterType toRegister(std::array const & array) { 295 | return _mm256_loadu_si256(reinterpret_cast(array.data())); 296 | } 297 | 298 | static inline __attribute__((always_inline)) SIMDRegisterType toRegister(void const * rawData) { 299 | return _mm256_loadu_si256(reinterpret_cast(rawData)); 300 | } 301 | 302 | /** 303 | * vgl http://stackoverflow.com/questions/25248766/emulating-shifts-on-32-bytes-with-avx 304 | * 305 | * @param a 306 | * @param bytesToShift 307 | * @return 308 | */ 309 | static inline __attribute__((always_inline)) SIMDRegisterType shiftLeftOneByte(SIMDRegisterType a) { 310 | return _mm256_alignr_epi8(a, _mm256_permute2x128_si256(a, a, _MM_SHUFFLE(0, 0, 2, 0)), 16 - 1); 311 | } 312 | 313 | static inline __attribute__((always_inline)) void store(SIMDRegisterType data, std::array & array) { 314 | _mm256_storeu_si256(reinterpret_cast(array.data()), data); 315 | } 316 | 317 | static inline __attribute__((always_inline)) void store(SIMDRegisterType data, void* location) { 318 | _mm256_storeu_si256(reinterpret_cast(location), data); 319 | } 320 | 321 | }; 322 | 323 | }} 324 | 325 | #endif -------------------------------------------------------------------------------- /testbench.cpp: -------------------------------------------------------------------------------- 1 | #include "genId.hpp" 2 | 3 | #include "lits/lits.hpp" 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #define RESET "\033[0m" 13 | #define RED "\033[31m" 14 | #define GREEN "\033[32m" 15 | #define YELLOW "\033[33m" 16 | #define BLUE "\033[34m" 17 | 18 | typedef enum { 19 | idcards = 0, 20 | randstr = 1, 21 | } strType; 22 | 23 | // Randomly generated keys (idcard) 24 | std::vector keys; 25 | const int default_key_cnt = 2e6; 26 | const int default_search_cnt = 1e6; 27 | const int default_scan_cnt = 1e5; 28 | const int default_scan_range = 100; 29 | 30 | // std::string will store strings with a length less than 16 locally. To ensure 31 | // unified memory access for the data, we store all the data in a buffer. 32 | 33 | // For bulk load 34 | char *bulk_data; 35 | char **bulk_keys; 36 | uint64_t *bulk_vals; 37 | int num_of_bulk; 38 | 39 | // For search 40 | char *search_data; 41 | char **search_keys; 42 | int num_of_search; 43 | 44 | // For insert 45 | char *insert_data; 46 | char **insert_keys; 47 | uint64_t dummy_value = 982; 48 | int num_of_insert; 49 | 50 | void generateKeys(strType t) { 51 | keys = IdGenerator::getKeys(default_key_cnt, t); 52 | } 53 | 54 | void _Myfree(void *&addr) { 55 | if (addr) { 56 | free(addr); 57 | addr = NULL; 58 | } 59 | } 60 | 61 | void freeData() { 62 | _Myfree((void *&)bulk_data); 63 | _Myfree((void *&)bulk_keys); 64 | _Myfree((void *&)bulk_vals); 65 | _Myfree((void *&)search_data); 66 | _Myfree((void *&)search_keys); 67 | _Myfree((void *&)insert_data); 68 | _Myfree((void *&)insert_keys); 69 | } 70 | 71 | void prepareSearchQuerys() { 72 | std::cout << "[Info]: Preparing search queries ..." << std::endl; 73 | 74 | // Prepare 20M keys (100% bulk load) 75 | int key_cnt = default_key_cnt; 76 | uint64_t bulk_byte_size = 0, bulk_ofs = 0; 77 | uint64_t search_byte_size = 0, search_ofs = 0; 78 | 79 | // Prepare the bulk load key number 80 | num_of_bulk = key_cnt; 81 | 82 | for (int i = 0; i < keys.size(); i++) { 83 | bulk_byte_size += keys[i].length() + 1; 84 | } 85 | 86 | // Prepare the bulk load data 87 | bulk_data = new char[bulk_byte_size]; 88 | bulk_keys = new char *[num_of_bulk]; 89 | bulk_vals = new uint64_t[num_of_bulk]; 90 | 91 | // Copy the data 92 | for (int i = 0; i < keys.size(); ++i) { 93 | bulk_keys[i] = bulk_data + bulk_ofs; 94 | bulk_vals[i] = i + 1; 95 | memcpy(bulk_data + bulk_ofs, keys[i].c_str(), keys[i].length() + 1); 96 | bulk_ofs += keys[i].length() + 1; 97 | } 98 | 99 | assert(bulk_ofs == bulk_byte_size); 100 | 101 | // 10M keys for possitive search 102 | std::random_shuffle(keys.begin(), keys.end()); 103 | 104 | // Prepare the search key number 105 | num_of_search = default_search_cnt; 106 | 107 | for (int i = 0; i < num_of_search; i++) { 108 | search_byte_size += keys[i].length() + 1; 109 | } 110 | 111 | // Prepare the search data 112 | search_data = new char[search_byte_size]; 113 | search_keys = new char *[num_of_search]; 114 | 115 | // Copy the data 116 | for (int i = 0; i < num_of_search; ++i) { 117 | search_keys[i] = search_data + search_ofs; 118 | memcpy(search_data + search_ofs, keys[i].c_str(), keys[i].length() + 1); 119 | search_ofs += keys[i].length() + 1; 120 | } 121 | 122 | assert(search_ofs == search_byte_size); 123 | } 124 | 125 | void prepareInsertQuerys() { 126 | std::cout << "[Info]: Preparing insert queries ..." << std::endl; 127 | 128 | // Prepare 20M keys (50% bulk load, 50% insert) 129 | int key_cnt = default_key_cnt; 130 | uint64_t bulk_byte_size = 0, bulk_ofs = 0; 131 | uint64_t insert_byte_size = 0, insert_ofs = 0; 132 | 133 | // Prepare the bulk load key number 134 | num_of_bulk = key_cnt / 2; 135 | num_of_insert = key_cnt / 2; 136 | 137 | // Randomly shuffle the keys 138 | std::random_shuffle(keys.begin(), keys.end()); 139 | 140 | // Sort the 50% keys for bulk load 141 | std::partial_sort(keys.begin(), keys.begin() + num_of_bulk, keys.end()); 142 | 143 | for (int i = 0; i < num_of_bulk; i++) { 144 | bulk_byte_size += keys[i].length() + 1; 145 | } 146 | for (int i = num_of_bulk; i < num_of_bulk + num_of_insert; i++) { 147 | insert_byte_size += keys[i].length() + 1; 148 | } 149 | 150 | // Prepare the bulk load data 151 | bulk_data = new char[bulk_byte_size]; 152 | bulk_keys = new char *[num_of_bulk]; 153 | bulk_vals = new uint64_t[num_of_bulk]; 154 | 155 | // Prepare the insert data 156 | insert_data = new char[insert_byte_size]; 157 | insert_keys = new char *[num_of_insert]; 158 | 159 | // Copy the data 160 | for (int i = 0; i < num_of_bulk; ++i) { 161 | // Copy the bulk load data 162 | bulk_keys[i] = bulk_data + bulk_ofs; 163 | bulk_vals[i] = i + 1; 164 | memcpy(bulk_data + bulk_ofs, keys[i].c_str(), keys[i].length() + 1); 165 | bulk_ofs += keys[i].length() + 1; 166 | } 167 | for (int i = 0; i < num_of_insert; ++i) { 168 | // Copy the insert data 169 | insert_keys[i] = insert_data + insert_ofs; 170 | memcpy(insert_data + insert_ofs, keys[i + num_of_bulk].c_str(), 171 | keys[i + num_of_bulk].length() + 1); 172 | insert_ofs += keys[i + num_of_bulk].length() + 1; 173 | } 174 | } 175 | 176 | void prepareScanQuerys() { 177 | std::cout << "[Info]: Preparing scan queries ..." << std::endl; 178 | 179 | // Prepare 20M keys (100% bulk load) 180 | int key_cnt = default_key_cnt; 181 | uint64_t bulk_byte_size = 0, bulk_ofs = 0; 182 | uint64_t search_byte_size = 0, search_ofs = 0; 183 | 184 | // Prepare the bulk load key number 185 | num_of_bulk = key_cnt; 186 | 187 | for (int i = 0; i < keys.size(); i++) { 188 | bulk_byte_size += keys[i].length() + 1; 189 | } 190 | 191 | // Prepare the bulk load data 192 | bulk_data = new char[bulk_byte_size]; 193 | bulk_keys = new char *[num_of_bulk]; 194 | bulk_vals = new uint64_t[num_of_bulk]; 195 | 196 | // Copy the data 197 | for (int i = 0; i < keys.size(); ++i) { 198 | bulk_keys[i] = bulk_data + bulk_ofs; 199 | bulk_vals[i] = i + 1; 200 | memcpy(bulk_data + bulk_ofs, keys[i].c_str(), keys[i].length() + 1); 201 | bulk_ofs += keys[i].length() + 1; 202 | } 203 | 204 | assert(bulk_ofs == bulk_byte_size); 205 | 206 | // 1M keys for scan 207 | std::random_shuffle(keys.begin(), keys.end()); 208 | 209 | // Prepare the search key number 210 | num_of_search = default_scan_cnt; 211 | 212 | for (int i = 0; i < num_of_search; i++) { 213 | search_byte_size += keys[i].length() + 1; 214 | } 215 | 216 | // Prepare the search data 217 | search_data = new char[search_byte_size]; 218 | search_keys = new char *[num_of_search]; 219 | 220 | // Copy the data 221 | for (int i = 0; i < num_of_search; ++i) { 222 | search_keys[i] = search_data + search_ofs; 223 | memcpy(search_data + search_ofs, keys[i].c_str(), keys[i].length() + 1); 224 | search_ofs += keys[i].length() + 1; 225 | } 226 | 227 | assert(search_ofs == search_byte_size); 228 | } 229 | 230 | void OutputResult(uint64_t checkSum, int numQuery, double second) { 231 | std::cout << "[Info]: Checksum:\t" << checkSum << std::endl; 232 | std::cout << "[Info]: Query Count:\t" << numQuery << std::endl; 233 | std::cout << "[Info]: Throughput:\t\033[32m" << numQuery / (1e6 * second) 234 | << " Mops\033[0m" << std::endl; 235 | } 236 | 237 | void LITS_Search_test() { 238 | lits::LITS index; 239 | uint64_t checkSum = 0; 240 | struct timeval tv1, tv2; 241 | double second; 242 | 243 | std::cout << "[Info]: Index bulk loading ... " << std::endl; 244 | 245 | // Bulk load the keys 246 | index.bulkload((const char **)(bulk_keys), (const uint64_t *)(bulk_vals), 247 | num_of_bulk); 248 | 249 | std::cout << "[Info]: Index bulk loaded." << std::endl; 250 | 251 | gettimeofday(&tv1, NULL); 252 | 253 | for (int i = 0; i < num_of_search; ++i) { 254 | checkSum += index.lookup((const char *)(search_keys[i])) ? 1 : 0; 255 | } 256 | 257 | gettimeofday(&tv2, NULL); 258 | 259 | second = tv2.tv_sec - tv1.tv_sec + (tv2.tv_usec - tv1.tv_usec) / 1000000.0; 260 | 261 | OutputResult(checkSum, num_of_search, second); 262 | 263 | index.destroy(); 264 | } 265 | 266 | void LITS_Insert_test() { 267 | lits::LITS index; 268 | uint64_t checkSum = 0; 269 | struct timeval tv1, tv2; 270 | double second; 271 | 272 | std::cout << "[Info]: Index bulk loading ... " << std::endl; 273 | 274 | // Bulk load the keys 275 | index.bulkload((const char **)(bulk_keys), (const uint64_t *)(bulk_vals), 276 | num_of_bulk); 277 | 278 | std::cout << "[Info]: Index bulk loaded." << std::endl; 279 | 280 | gettimeofday(&tv1, NULL); 281 | 282 | for (int i = 0; i < num_of_insert; ++i) { 283 | checkSum += 284 | index.insert((const char *)(insert_keys[i]), dummy_value) ? 1 : 0; 285 | } 286 | 287 | gettimeofday(&tv2, NULL); 288 | 289 | second = tv2.tv_sec - tv1.tv_sec + (tv2.tv_usec - tv1.tv_usec) / 1000000.0; 290 | 291 | OutputResult(checkSum, num_of_insert, second); 292 | 293 | index.destroy(); 294 | } 295 | 296 | void LITS_Scan_test() { 297 | lits::LITS index; 298 | uint64_t checkSum = 0; 299 | struct timeval tv1, tv2; 300 | double second; 301 | 302 | std::cout << "[Info]: Index bulk loading ... " << std::endl; 303 | 304 | // Bulk load the keys 305 | index.bulkload((const char **)(bulk_keys), (const uint64_t *)(bulk_vals), 306 | num_of_bulk); 307 | 308 | std::cout << "[Info]: Index bulk loaded." << std::endl; 309 | 310 | gettimeofday(&tv1, NULL); 311 | 312 | for (int i = 0; i < num_of_search; ++i) { 313 | int scan_range = rand() % default_scan_range + 1; 314 | auto iter = index.find((const char *)(search_keys[i])); 315 | for (int i = 0; i < scan_range && iter.not_finish(); ++i) { 316 | checkSum += iter.getKV()->v; 317 | iter.next(); 318 | } 319 | } 320 | 321 | gettimeofday(&tv2, NULL); 322 | 323 | second = tv2.tv_sec - tv1.tv_sec + (tv2.tv_usec - tv1.tv_usec) / 1000000.0; 324 | 325 | OutputResult(checkSum, num_of_search, second); 326 | 327 | index.destroy(); 328 | } 329 | 330 | int main(int argc, char *argv[]) { 331 | srand(time(NULL)); 332 | 333 | if (argc != 3) { 334 | std::cout << "Usage: " << std::endl; 335 | std::cout << argv[0] << " idcards/randstr 1/2/3" << std::endl; 336 | return 0; 337 | } 338 | 339 | int testMode = atoi(argv[2]); 340 | if (testMode < 1 || testMode > 3) { 341 | std::cout << "1: Search-Only Test" << std::endl; 342 | std::cout << "2: Insert-Only Test" << std::endl; 343 | std::cout << "3: Scan-Only Test" << std::endl; 344 | return 0; 345 | } 346 | 347 | if (strcmp(argv[1], "idcards") == 0) 348 | generateKeys(idcards); 349 | else if (strcmp(argv[1], "randstr") == 0) 350 | generateKeys(randstr); 351 | else { 352 | std::cout << "Invalid argument" << std::endl; 353 | return 0; 354 | } 355 | 356 | // Do Search Test 357 | if (testMode == 1) { 358 | std::cout << std::endl; 359 | std::cout << "\033[33m" << "[Search-Only Test] (100% bulk load, " 360 | << default_search_cnt << " random search)" 361 | << "\033[0m" << std::endl; 362 | prepareSearchQuerys(); 363 | LITS_Search_test(); 364 | } 365 | 366 | // Do Insert Test 367 | if (testMode == 2) { 368 | std::cout << std::endl; 369 | std::cout << "\033[33m" 370 | << "[Insert-Only Test] (50% bulk load, 50% random insert)" 371 | << "\033[0m" << std::endl; 372 | prepareInsertQuerys(); 373 | LITS_Insert_test(); 374 | } 375 | 376 | // Do Scan Test 377 | if (testMode == 3) { 378 | std::cout << std::endl; 379 | std::cout << "\033[33m" << "[Short Scan Test] (100% bulk load, " 380 | << default_scan_cnt << " random scan)" 381 | << "\033[0m" << std::endl; 382 | prepareScanQuerys(); 383 | LITS_Scan_test(); 384 | } 385 | 386 | // Free the data 387 | freeData(); 388 | } 389 | -------------------------------------------------------------------------------- /lits/hot_src/include/MultiMaskPartialKeyMappingInterface.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __HOT__COMMONS__MULT_MASK_PARTIAL_KEY_MAPPING_INTERFACE___ 2 | #define __HOT__COMMONS__MULT_MASK_PARTIAL_KEY_MAPPING_INTERFACE___ 3 | 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include "PartialKeyMappingBase.hpp" 11 | #include "SIMDHelper.hpp" 12 | 13 | namespace hot { namespace commons { 14 | 15 | class SingleMaskPartialKeyMapping; 16 | 17 | template class MultiMaskPartialKeyMapping; 18 | 19 | /** 20 | * A partial key mapping which uses an array of byte offstes and a correspond array of byte masks which is able to extract partial keys consisting of those bits, which are stored in the underlying byte masks 21 | * 22 | * @tparam numberExtractionMasks the number of the underlying 64 bit masks. 1 implies 8 different mask bytes, 2 implies 16 different mask bytes and 3 implies 32 different mask bytes. 23 | */ 24 | template class MultiMaskPartialKeyMapping : public PartialKeyMappingBase { 25 | friend class MultiMaskPartialKeyMapping<1u>; 26 | friend class MultiMaskPartialKeyMapping<2u>; 27 | friend class MultiMaskPartialKeyMapping<4u>; 28 | public: 29 | static constexpr unsigned int MINIMUM_EXTRACTION_BIT_COUNT_SUPPORTED = numberExtractionMasks * 8; 30 | static constexpr unsigned int NUMBER_BITS = numberExtractionMasks * 64; 31 | using SIMDHelperType = SIMDHelper; 32 | using SIMDRegisterType = typename SIMDHelperType::SIMDRegisterType; 33 | using ExtractionDataArray = std::array; 34 | 35 | private: 36 | 37 | uint16_t mNumberExtractionBytes; 38 | uint16_t mNumberKeyBits; 39 | 40 | ExtractionDataArray mExtractionPositions; 41 | ExtractionDataArray mExtractionData; 42 | 43 | inline SIMDRegisterType getPositionsRegister() const; 44 | inline SIMDRegisterType getExtractionDataRegister() const; 45 | private: 46 | inline void setPositions(SIMDRegisterType positions); 47 | inline void setExtractionData(SIMDRegisterType data); 48 | 49 | public: 50 | inline MultiMaskPartialKeyMapping(MultiMaskPartialKeyMapping const & src); 51 | 52 | /** 53 | * Creates a new multi mask partial key mapping from a compatible partial key mapping. 54 | * 55 | * @param src a compatible partial key mapping 56 | */ 57 | inline MultiMaskPartialKeyMapping(typename std::conditional>::type const &src); 58 | 59 | 60 | /** 61 | * Creates a new single mask partial key mapping by adding a new discriminating bit. 62 | * 63 | * @param existing the existing partial key mapping to add the new discriminating bit to. 64 | * @param discriminatingBit the new discriminating bit to add 65 | */ 66 | inline MultiMaskPartialKeyMapping(SingleMaskPartialKeyMapping const &existing, DiscriminativeBit const & significantKeyInformation); 67 | 68 | /** 69 | * Creates a new single mask partial key mapping by adding a new discriminating bit. 70 | * 71 | * @param existing the existing partial key mapping to add the new discriminating bit to. 72 | * @param discriminatingBit the new discriminating bit to add 73 | */ 74 | inline MultiMaskPartialKeyMapping(MultiMaskPartialKeyMapping const & existing, DiscriminativeBit const & significantKeyInformation, unsigned int extractionByteIndex); 75 | 76 | inline MultiMaskPartialKeyMapping( 77 | MultiMaskPartialKeyMapping const &existing, DiscriminativeBit const &significantKeyInformation, unsigned int const extractionByteIndex, 78 | typename MultiMaskPartialKeyMapping::SIMDRegisterType const & maskForLessSignificantBytes 79 | ); 80 | 81 | template 82 | inline MultiMaskPartialKeyMapping( 83 | MultiMaskPartialKeyMapping const & existing, DiscriminativeBit const &significantKeyInformation, unsigned int const extractionByteIndex, 84 | typename MultiMaskPartialKeyMapping::SIMDRegisterType const & maskForLessSignificantBytes 85 | ); 86 | 87 | template 88 | inline MultiMaskPartialKeyMapping( 89 | MultiMaskPartialKeyMapping const &existing, 90 | uint32_t bytesUsedMask, typename MultiMaskPartialKeyMapping::ExtractionDataArray const & extractionDataUsed, 91 | uint16_t const numberBytesUsed, uint16_t const bitsUsed, 92 | uint16_t const mostSignificantBitIndex, uint16_t const leastSignificantBitIndex 93 | ); 94 | 95 | inline MultiMaskPartialKeyMapping( 96 | uint16_t const numberBytesUsed, uint16_t const bitsUsed, 97 | ExtractionDataArray const & mExtractionPositions, 98 | ExtractionDataArray const & mExtractionData 99 | ); 100 | 101 | protected: 102 | inline MultiMaskPartialKeyMapping( 103 | uint16_t const mostSignificantBitIndex, uint16_t const leastSignificantBitIndex, 104 | uint16_t const numberBytesUsed, uint16_t const bitsUsed 105 | ); 106 | 107 | public: 108 | inline uint32_t extractMask(uint8_t const *keyBytes) const; 109 | 110 | /** 111 | * Given a key information it generates a prefix up to the position represented by the key information and generates a mask corresponding to the bits defined in this extraction mask 112 | * 113 | * @param significantKeyInformation a description of the position creating a prefix of (the position is exclusive). eg. if the position is 3 the prefix is 11 114 | * @return the mask containing the bits corresponding to the prefix described the key information 115 | */ 116 | template inline PartialKeyType getPrefixBitsMask(DiscriminativeBit const &significantKeyInformation) const; 117 | 118 | /** 119 | * Given a key information it generates a prefix up to the position represented by the key information and generates a mask corresponding to the bits defined in this extraction mask 120 | * 121 | * @param significantKeyInformation a description of the position creating a prefix of (the position is exclusive). eg. if the position is 3 the prefix is 11 122 | * @return the mask containing the bits corresponding to the prefix described the key information 123 | */ 124 | inline uint16_t calculateNumberBitsUsed() const; 125 | 126 | /** 127 | * inserts a new discriminating bit into this single mask mapping. 128 | * The resulting new mask is passed to the provided operation 129 | * 130 | * @tparam Operation a callback which is able to handle the new partial key mapping (can be either of single or multi mask type= 131 | * @param discriminativeBit 132 | * @param operation the operation which process the new discriminative bits mapping 133 | * @return the result of the provided operation 134 | */ 135 | template 136 | inline auto insert(DiscriminativeBit const &significantKeyInformation, Operation const &operation) const; 137 | 138 | /** 139 | * extracts only a subset of the discriminative bits. 140 | * The resulting new partial key mapping is passed to the provided operation 141 | * 142 | * @tparam Operation a callback which is able to handle the new partial key mapping (can only be of type single mask) 143 | * @param bitsUsed a partial key having only those bits set which should be part of the new partial key mapping 144 | * @param operation the operation which process the new discriminative bits mapping 145 | * @return the result of the provided operation 146 | */ 147 | template 148 | inline auto extract(uint32_t bitsUsed, Operation const &operation) const; 149 | 150 | inline std::array createIntermediateKeyWithOnlySignificantBitsSet() const; 151 | 152 | /** 153 | * a helper function which invokes a callback with the this partial key mapping itself and the smallest possible partial key type which is necessary to represent partial keys 154 | * constructued by this partial key mappings. 155 | * 156 | * e.g.: 157 | * + for less or equal then 8 discriminative bits => uint8_t 158 | * + for less or equal then 16 discriminative bits => uint16_t 159 | * + for less or equal then 32 discriminative bits => uint32_t 160 | * 161 | * 162 | * @tparam Operation the type of the callback to invoke 163 | * @param operation the callback 164 | * @return the result of the operation 165 | */ 166 | template inline auto executeWithCorrectMaskAndDiscriminativeBitsRepresentation(Operation const & operation) const; 167 | 168 | /** 169 | * compresses this partial key mapping to the most space efficient partial key mapping which is able to represent the discriminative bits represented by this partial key mapping 170 | * 171 | * @tparam Operation the callback type 172 | * @param operation the callback receiving the compressed partial key mmapings 173 | * @return the result of the operation 174 | */ 175 | template inline auto executeWithCompressedDiscriminativeBitsRepresentation(Operation const & operation) const; 176 | 177 | public: 178 | inline bool hasUnusedBytes() const; 179 | 180 | private: 181 | inline SIMDRegisterType getMaskForPositionsLargerOrEqualTo(unsigned int bytePosition) const; 182 | 183 | inline void initializeDataAndPositionsWithZero(); 184 | 185 | inline ExtractionDataArray mapInput(uint8_t const __restrict__ *keyBytes) const; 186 | 187 | inline uint32_t extractMaskForMappedInput(ExtractionDataArray const &mappedInputData) const; 188 | 189 | public: 190 | /** 191 | * 192 | * @return a partial key with only the highest bit set 193 | */ 194 | inline uint32_t getMaskForHighestBit() const; 195 | 196 | /** 197 | * Gets a mask with only the bit set specified by the given key information 198 | * 199 | * @param significantKeyInformation the key information to specify the byte to extract and map to the output mask 200 | * @return the mask with only the specified bit set 201 | */ 202 | inline uint32_t getMaskFor(DiscriminativeBit const &significantKeyInformation) const; 203 | 204 | /** 205 | * @return a mask with all mask bits set. This results in a mask like 00111111 where the number of 1s is equal to the number of keybits 206 | */ 207 | inline uint32_t getAllMaskBits() const; 208 | 209 | private: 210 | inline uint32_t getExtractionByteIndexForPosition(uint16_t bytePosition) const; 211 | 212 | /** 213 | * Gets an extractio byte with a single bit set at a position relative to the most significant bit positoin of the byte 214 | * 215 | * Hence, for byteRelativeBitPosition 0 it returns the byte 0b10000000 and for 7 it returns the byte 0b00000000 216 | * 217 | * @param byteRelativeBitPosition the position of the bit to set relative to the most significant bit position 218 | * @return the byte with the specified bit set 219 | */ 220 | static inline uint8_t getExtractionByteWithBitSetAtRelativePosition(uint16_t byteRelativeBitPosition); 221 | 222 | /** 223 | * Gets the extraction Data for a given extraction Mask 224 | * 225 | * @param usedMaskBits the used masks bits 226 | * 227 | * @return the extraction Bits, which are set according to the mask bits 228 | */ 229 | inline ExtractionDataArray getUsedExtractionBitsForMask(uint32_t usedMaskBits) const; 230 | 231 | 232 | static inline ExtractionDataArray zeroInitializedArray(); 233 | 234 | static inline uint8_t getExtractionByteAt(ExtractionDataArray const & extractionData, uint32_t extractionByteIndex); 235 | 236 | static inline void setExtractionByteAt(ExtractionDataArray &extractionData, uint32_t extractionByteIndex, uint8_t extractionByte); 237 | 238 | inline uint32_t getMaskForExtractionBytesUsed() const; 239 | 240 | inline uint32_t getBytesUsedMaskForExtractionData(ExtractionDataArray const & extractionData) const; 241 | 242 | inline uint32_t getLeastSignificantBytIndexForBytesUsedMask(uint32_t bytesUsedMask) const; 243 | 244 | public: 245 | /** 246 | * Gets a mask with only the most significan bit set of the given mask, this function is solely need for debugging purpos 247 | * Due to little, big endian differences the extraction information must be considered. 248 | * 249 | * e.g. bit for position 0 might be the 7th bit of the mask. 250 | * With less significant bits stored in the bits 8 till 31 251 | * 252 | * @param mask the mask to extract the most significan bit from 253 | * @return the most significant bit of the given mask. 254 | */ 255 | template 256 | inline PartialKeyType getMostSignifikantMaskBit(PartialKeyType mask) const; 257 | 258 | /** 259 | * 260 | * @param partialKey 261 | * @return the least significant bit set in the given partial key 262 | */ 263 | inline uint16_t getLeastSignificantBitIndex(uint32_t mask) const; 264 | 265 | inline uint8_t getExtractionBytePosition(unsigned int index) const; 266 | 267 | inline void setExtractionBytePosition(unsigned int index, uint8_t byte); 268 | 269 | inline uint8_t getExtractionByte(unsigned int index) const; 270 | 271 | inline void setExtractionByte(unsigned int index, uint8_t byte); 272 | 273 | /** 274 | * 275 | * @return a set of all the discriminative bit positions represnted by this partial key mapping 276 | */ 277 | inline std::set getDiscriminativeBits() const; 278 | 279 | /** 280 | * @return the number of the underlying 8 byte masks which are currently not 0 281 | */ 282 | inline uint16_t getNumberExtractionBytes() const; 283 | }; 284 | 285 | } } 286 | 287 | #endif 288 | -------------------------------------------------------------------------------- /lits/hot_src/include/SingleMaskPartialKeyMapping.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __HOT__COMMONS__SINGLE_MASK_PARTIAL_KEY_MAPPING_HPP___ 2 | #define __HOT__COMMONS__SINGLE_MASK_PARTIAL_KEY_MAPPING_HPP___ 3 | 4 | #include 5 | 6 | #include 7 | #include 8 | 9 | #include "Algorithms.hpp" 10 | #include "DiscriminativeBit.hpp" 11 | 12 | #include "PartialKeyMappingBase.hpp" 13 | #include "SingleMaskPartialKeyMappingInterface.hpp" 14 | #include "MultiMaskPartialKeyMapping.hpp" 15 | 16 | namespace hot { namespace commons { 17 | 18 | constexpr uint64_t SUCCESSIVE_EXTRACTION_MASK_WITH_HIGHEST_BIT_SET = 1ul << 63; 19 | 20 | inline SingleMaskPartialKeyMapping::SingleMaskPartialKeyMapping(SingleMaskPartialKeyMapping const & src) 21 | : PartialKeyMappingBase() 22 | { 23 | _mm_storeu_si128(reinterpret_cast<__m128i*>(this), _mm_loadu_si128(reinterpret_cast<__m128i const *>(&src))); 24 | } 25 | 26 | inline SingleMaskPartialKeyMapping::SingleMaskPartialKeyMapping(DiscriminativeBit const & discriminativeBit) 27 | : PartialKeyMappingBase(discriminativeBit.mAbsoluteBitIndex, discriminativeBit.mAbsoluteBitIndex), 28 | mOffsetInBytes(getSuccesiveByteOffsetForMostRightByte(discriminativeBit.mByteIndex)), 29 | mSuccessiveExtractionMask(getSuccessiveMaskForBit(discriminativeBit.mByteIndex, discriminativeBit.mByteRelativeBitIndex)) { 30 | assert(mOffsetInBytes < 255); 31 | } 32 | 33 | inline SingleMaskPartialKeyMapping::SingleMaskPartialKeyMapping( 34 | uint8_t const * extractionBytePositions, 35 | uint8_t const * extractionByteData, 36 | uint32_t const extractionBytesUsedMask, 37 | uint16_t const mostSignificantBitIndex, 38 | uint16_t const leastSignificantBitIndex 39 | ) : PartialKeyMappingBase(mostSignificantBitIndex, leastSignificantBitIndex), 40 | mOffsetInBytes(getSuccesiveByteOffsetForLeastSignificantBitIndex(leastSignificantBitIndex)), 41 | mSuccessiveExtractionMask(getSuccessiveExtractionMaskFromRandomBytes(extractionBytePositions, extractionByteData, extractionBytesUsedMask, mOffsetInBytes)) 42 | { 43 | assert(mOffsetInBytes < 255); 44 | } 45 | 46 | 47 | inline SingleMaskPartialKeyMapping::SingleMaskPartialKeyMapping( 48 | SingleMaskPartialKeyMapping const & existing, 49 | DiscriminativeBit const & discriminatingBit 50 | ) : PartialKeyMappingBase(existing, discriminatingBit), 51 | mOffsetInBytes(getSuccesiveByteOffsetForLeastSignificantBitIndex(mLeastSignificantDiscriminativeBitIndex)), 52 | mSuccessiveExtractionMask( 53 | getSuccessiveMaskForBit(discriminatingBit.mByteIndex, discriminatingBit.mByteRelativeBitIndex) 54 | | (existing.mSuccessiveExtractionMask >> (convertBytesToBits(mOffsetInBytes - existing.mOffsetInBytes))) 55 | ) 56 | { 57 | assert(mOffsetInBytes < 255); 58 | } 59 | 60 | 61 | 62 | inline SingleMaskPartialKeyMapping::SingleMaskPartialKeyMapping( 63 | SingleMaskPartialKeyMapping const & existing, 64 | uint32_t const & maskBitsNeeded 65 | ) : SingleMaskPartialKeyMapping(existing, existing.getSuccessiveMaskForMask(maskBitsNeeded)) 66 | { 67 | assert(_mm_popcnt_u32(maskBitsNeeded) >= 1); 68 | assert(mOffsetInBytes < 255); 69 | } 70 | 71 | inline uint16_t SingleMaskPartialKeyMapping::calculateNumberBitsUsed() const { 72 | return _mm_popcnt_u64(mSuccessiveExtractionMask); 73 | } 74 | 75 | template inline PartialKeyType 76 | SingleMaskPartialKeyMapping::getPrefixBitsMask(DiscriminativeBit const &significantKeyInformation) const { 77 | int relativeMissmatchingByteIndex = significantKeyInformation.mByteIndex - mOffsetInBytes; 78 | 79 | //Extracts a bit masks corresponding to 111111111|00000000 with the first zero bit marking the missmatching bit index. 80 | 81 | //PAPER describe that little endian byte order must be respected. 82 | //due to little endian encoding inside the byte the shift direction is reversed 83 | //it has the form 1110000 111111111.... 84 | uint64_t singleBytePrefixMask = ((UINT64_MAX >> significantKeyInformation.mByteRelativeBitIndex) ^ (UINT64_MAX << 56)) * (relativeMissmatchingByteIndex >= 0); 85 | //mask where highest byte (most right byte in little endian) is set to the deleted mask 86 | //results ins a mask like this 010111|1111111111111111...... where the pipe masks the end of the first byte 87 | //in the next step this mask is moved to the right to mask the prefix byte (in little endian move high byte to the right actually means moving high byte to the left hence turning the most right bytes 0 (after the prefix)) 88 | uint64_t subtreeSuccesiveBytesMask = relativeMissmatchingByteIndex > 7 ? UINT64_MAX : (singleBytePrefixMask >> ((7 - relativeMissmatchingByteIndex) * 8)); 89 | 90 | 91 | return extractMaskFromSuccessiveBytes(subtreeSuccesiveBytesMask); 92 | } 93 | 94 | template inline auto SingleMaskPartialKeyMapping::insert(DiscriminativeBit const & discriminativeBit, Operation const & operation) const { 95 | bool isSingleMaskPartialKeyMapping = 96 | ((static_cast(discriminativeBit.mByteIndex - getByteIndex(mMostSignificantDiscriminativeBitIndex))) < 8) 97 | & ((static_cast(getByteIndex(mLeastSignificantDiscriminativeBitIndex) - discriminativeBit.mByteIndex)) < 8); 98 | 99 | return isSingleMaskPartialKeyMapping 100 | ? operation(SingleMaskPartialKeyMapping { *this, discriminativeBit }) 101 | : (hasUnusedBytes() 102 | ? operation(MultiMaskPartialKeyMapping<1u> { *this, discriminativeBit }) 103 | : operation(MultiMaskPartialKeyMapping<2u> { *this, discriminativeBit }) 104 | ); 105 | } 106 | 107 | template inline auto SingleMaskPartialKeyMapping::extract(uint32_t bitsUsed, Operation const & operation) const { 108 | assert(_mm_popcnt_u32(bitsUsed) >= 1); 109 | return operation(SingleMaskPartialKeyMapping { *this, bitsUsed } ); 110 | } 111 | 112 | template inline auto SingleMaskPartialKeyMapping::executeWithCorrectMaskAndDiscriminativeBitsRepresentation(Operation const & operation) const { 113 | assert(getMaximumMaskByteIndex(calculateNumberBitsUsed()) <= 3); 114 | switch(getMaximumMaskByteIndex(calculateNumberBitsUsed())) { 115 | case 0: 116 | return operation(*this, static_cast(UINT8_MAX)); 117 | case 1: 118 | return operation(*this, static_cast(UINT16_MAX)); 119 | default: //case 2 + 3 120 | return operation(*this, static_cast(UINT32_MAX)); 121 | } 122 | } 123 | 124 | inline bool SingleMaskPartialKeyMapping::hasUnusedBytes() const { 125 | return getUsedBytesMask() != UINT8_MAX; 126 | } 127 | 128 | inline uint32_t SingleMaskPartialKeyMapping::getUsedBytesMask() const { 129 | __m64 extractionMaskRegister = getRegister(); 130 | return _mm_movemask_pi8(_mm_cmpeq_pi8(extractionMaskRegister, _mm_setzero_si64())) ^ UINT8_MAX; 131 | } 132 | 133 | inline uint32_t SingleMaskPartialKeyMapping::getByteOffset() const { 134 | return mOffsetInBytes; 135 | } 136 | 137 | 138 | inline uint8_t SingleMaskPartialKeyMapping::getExtractionByte(unsigned int byteIndex) const { 139 | return reinterpret_cast(&mSuccessiveExtractionMask)[byteIndex]; 140 | } 141 | 142 | inline uint8_t SingleMaskPartialKeyMapping::getExtractionBytePosition(unsigned int byteIndex) const { 143 | return byteIndex + mOffsetInBytes; 144 | } 145 | 146 | inline uint32_t SingleMaskPartialKeyMapping::getMaskForHighestBit() const { 147 | return extractMaskFromSuccessiveBytes(getSuccessiveMaskForAbsoluteBitPosition(mMostSignificantDiscriminativeBitIndex)); 148 | } 149 | 150 | inline uint32_t SingleMaskPartialKeyMapping::getMaskFor(DiscriminativeBit const & discriminativeBit) const { 151 | return extractMaskFromSuccessiveBytes(getSuccessiveMaskForBit(discriminativeBit.mByteIndex, discriminativeBit.mByteRelativeBitIndex)); 152 | } 153 | 154 | inline uint32_t SingleMaskPartialKeyMapping::getAllMaskBits() const { 155 | return _pext_u64(mSuccessiveExtractionMask, mSuccessiveExtractionMask); 156 | } 157 | 158 | inline __attribute__((always_inline)) uint32_t SingleMaskPartialKeyMapping::extractMask(uint8_t const * keyBytes) const { 159 | return extractMaskFromSuccessiveBytes(*reinterpret_cast(keyBytes + mOffsetInBytes)); 160 | } 161 | 162 | inline std::array SingleMaskPartialKeyMapping::createIntermediateKeyWithOnlySignificantBitsSet() const { 163 | std::array intermediateKey; 164 | std::memset(intermediateKey.data(), 0, 256); 165 | std::memmove(intermediateKey.data() + mOffsetInBytes, &mSuccessiveExtractionMask, sizeof(mSuccessiveExtractionMask)); 166 | return intermediateKey; 167 | }; 168 | 169 | inline SingleMaskPartialKeyMapping::SingleMaskPartialKeyMapping( 170 | SingleMaskPartialKeyMapping const & existing, uint64_t const newExtractionMaskWithSameOffset 171 | ) : PartialKeyMappingBase( 172 | convertBytesToBits(existing.mOffsetInBytes) + calculateRelativeMostSignificantBitIndex(newExtractionMaskWithSameOffset), 173 | convertBytesToBits(existing.mOffsetInBytes) + calculateRelativeLeastSignificantBitIndex(newExtractionMaskWithSameOffset) 174 | ), 175 | mOffsetInBytes(static_cast(getSuccesiveByteOffsetForLeastSignificantBitIndex(mLeastSignificantDiscriminativeBitIndex))), 176 | mSuccessiveExtractionMask(newExtractionMaskWithSameOffset << (convertBytesToBits(existing.mOffsetInBytes - mOffsetInBytes))) 177 | { 178 | } 179 | 180 | inline __attribute__((always_inline)) uint32_t SingleMaskPartialKeyMapping::extractMaskFromSuccessiveBytes(uint64_t const inputMask) const { 181 | return static_cast( _pext_u64(inputMask, mSuccessiveExtractionMask)); 182 | } 183 | 184 | inline __m64 SingleMaskPartialKeyMapping::getRegister() const { 185 | return _mm_cvtsi64_m64(mSuccessiveExtractionMask); 186 | } 187 | 188 | inline uint64_t SingleMaskPartialKeyMapping::getSuccessiveMaskForBit(uint const bytePosition, uint const byteRelativeBitPosition) const { 189 | return 1ul << convertToIndexOfOtherEndiness(bytePosition - mOffsetInBytes, byteRelativeBitPosition); 190 | } 191 | 192 | inline uint SingleMaskPartialKeyMapping::convertToIndexOfOtherEndiness(uint const maskRelativeBytePosition, uint const byteRelativeBitPosition) { 193 | return (maskRelativeBytePosition * 8) + 7 - byteRelativeBitPosition; 194 | } 195 | 196 | inline uint64_t SingleMaskPartialKeyMapping::getSuccessiveMaskForAbsoluteBitPosition(uint absoluteBitPosition) const { 197 | return getSuccessiveMaskForBit(getByteIndex(absoluteBitPosition), bitPositionInByte(absoluteBitPosition)); 198 | } 199 | 200 | inline uint64_t SingleMaskPartialKeyMapping::getSuccessiveMaskForMask(uint32_t const mask) const { 201 | return _pdep_u64(mask, mSuccessiveExtractionMask); 202 | } 203 | 204 | inline uint SingleMaskPartialKeyMapping::getSuccesiveByteOffsetForLeastSignificantBitIndex(uint leastSignificantBitIndex) { 205 | return getSuccesiveByteOffsetForMostRightByte(getByteIndex(leastSignificantBitIndex)); 206 | } 207 | 208 | inline uint16_t SingleMaskPartialKeyMapping::calculateRelativeMostSignificantBitIndex(uint64_t rawExtractionMask) { 209 | assert(rawExtractionMask != 0); 210 | uint64_t reverseMask = __builtin_bswap64(rawExtractionMask); 211 | return __lzcnt64(reverseMask); 212 | } 213 | 214 | inline uint16_t SingleMaskPartialKeyMapping::calculateRelativeLeastSignificantBitIndex(uint64_t rawExtractionMask) { 215 | assert(rawExtractionMask != 0); 216 | return 63 - __tzcnt_u64(__builtin_bswap64(rawExtractionMask)); 217 | } 218 | 219 | inline uint64_t SingleMaskPartialKeyMapping::getSuccessiveExtractionMaskFromRandomBytes( 220 | uint8_t const * extractionBytePositions, 221 | uint8_t const * extractionByteData, 222 | uint32_t extractionBytesUsedMask, 223 | uint32_t const offsetInBytes 224 | ) { 225 | uint64_t successiveExtractionMask = 0ul; 226 | uint8_t* successiveExtractionBytes = reinterpret_cast(&successiveExtractionMask); 227 | while(extractionBytesUsedMask > 0) { 228 | uint extractionByteIndex = __tzcnt_u32(extractionBytesUsedMask); 229 | uint targetExtractionBytePosition = extractionBytePositions[extractionByteIndex] - offsetInBytes; 230 | successiveExtractionBytes[targetExtractionBytePosition] = extractionByteData[extractionByteIndex]; 231 | extractionBytesUsedMask = _blsr_u32(extractionBytesUsedMask); 232 | } 233 | return successiveExtractionMask; 234 | } 235 | 236 | template inline PartialKeyType SingleMaskPartialKeyMapping::getMostSignifikantMaskBit(PartialKeyType mask) const { 237 | uint64_t correspondingSuccessiveExtractionMask = getSuccessiveMaskForMask(mask); 238 | unsigned int byteShiftOffset = __tzcnt_u64(correspondingSuccessiveExtractionMask) & (~0b111); 239 | uint64_t mostSignificantExtractionByteMask = correspondingSuccessiveExtractionMask & (static_cast(UINT8_MAX) << byteShiftOffset); //pad -> remove relative bit index 240 | uint64_t extractionMaskWithOnlyMostSignificanMaskBitSet = SUCCESSIVE_EXTRACTION_MASK_WITH_HIGHEST_BIT_SET >> _lzcnt_u64(mostSignificantExtractionByteMask); 241 | return extractMaskFromSuccessiveBytes(extractionMaskWithOnlyMostSignificanMaskBitSet); 242 | } 243 | 244 | inline uint16_t SingleMaskPartialKeyMapping::getLeastSignificantBitIndex(uint32_t partialKey) const { 245 | return convertBytesToBits(mOffsetInBytes) + calculateRelativeLeastSignificantBitIndex(getSuccessiveMaskForMask(partialKey)); 246 | } 247 | 248 | inline std::set SingleMaskPartialKeyMapping::getDiscriminativeBits() const { 249 | uint64_t swapedExtractionMask = __builtin_bswap64(mSuccessiveExtractionMask); 250 | std::set extractionBits; 251 | 252 | while(swapedExtractionMask != 0) { 253 | uint isZero = swapedExtractionMask == 0; 254 | uint notIsZero = 1 - isZero; 255 | 256 | uint16_t bitIndex = (isZero * 63) + (notIsZero * __lzcnt64(swapedExtractionMask)); 257 | uint64_t extractionBit = (1ul << 63) >> bitIndex; 258 | swapedExtractionMask &= (~extractionBit); 259 | 260 | extractionBits.insert(convertBytesToBits(mOffsetInBytes) + bitIndex); 261 | } 262 | 263 | return extractionBits; 264 | } 265 | 266 | } } 267 | 268 | #endif 269 | --------------------------------------------------------------------------------