├── CMakeLists.txt ├── readme.txt ├── README.md ├── zipf.h ├── filmadalru.h ├── main.cpp ├── data.h ├── pwlf.h └── filmadastorage.h /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.21) 2 | project(FILM) 3 | 4 | set(CMAKE_CXX_STANDARD 17) 5 | 6 | add_executable(FILM main.cpp film.h pwlf.h data.h 7 | filmadastorage.h filmadalru.h 8 | ) 9 | 10 | if(MSVC) 11 | set(CMAKE_CXX_FLAGS "/O2 /arch:AVX2 /W1 /EHsc") 12 | elseif (CMAKE_CXX_COMPILER_ID STREQUAL "Intel") 13 | set(CMAKE_CXX_FLAGS "-O3 -xHost") 14 | else() 15 | # clang and gcc 16 | set(CMAKE_CXX_FLAGS "-O0 -march=native -Wall -Wextra") #chao changes O3 to O0 17 | endif() 18 | -------------------------------------------------------------------------------- /readme.txt: -------------------------------------------------------------------------------- 1 | # film 2 | a fully learned index for larger-than-memory databases 3 | 4 | 5 | # 6 | 7 | ### Getting started 8 | FILM can be used as a header-only library 9 | You will need to compile the program with at least the C++17 standard (e.g., '-std = c++17') in the [CMakeLists.txt] 10 | 11 | In this repository, you can compile and run film with [main.cpp] 12 | 13 | There are some examples in [main.cpp] 14 | -- test_interleave_insert_query: test the workload that interleave between inserts and queries, 15 | -- test_out_of_order_insertion: test out-of-order insertion, control by parameter out_of_order_frac 16 | -- test_query_baseline: test the query workload that with a fixed number of point and/or range queries 17 | 18 | 19 | 20 | ### Datasets and data type 21 | You can also run this benchmark on your own dataset. 22 | change the filepath in [data.h] 23 | the format of your dataset need to be in either binary format or text format (one key per line). 24 | the datasets we used in paper are in text format, binary format are also supported in [data.sh] with the 'load_binary_data'. 25 | 26 | the currently support data type of keys are long int or double. 27 | need to modify (typedef double key_type;) or (typedef long int key_type;) in [film.h] 28 | 29 | the header file about datasets and workloads are data.h, zipf.h 30 | 31 | -- 'loaddata' 32 | -- get the searched keys at runtime 33 | ----'get_search_keys_zipf' 34 | ----'get_search_keys' 35 | ----'get_search_keys_scrambledzipf' 36 | ----'get_search_keys_hotspot' 37 | 38 | -- get the searched ranges at runtime 39 | ----'get_search_ranges_zipf' 40 | ----'get_search_ranges' 41 | ----'get_search_ranges_scrambledzipf' 42 | ----'get_search_ranges_hotspot' 43 | 44 | -- 'loadpquery' (pre-generated searched keys) 45 | -- 'loadrquery' (pre-generated searched keys) 46 | 47 | 48 | the core source file of film are in film.h, filmadalru.h, filmadastorage, pwlf.h 49 | 50 | -- film.h: the core source file of film 51 | --- test_interleave_insert_query 52 | --- search_one 53 | --- search_range 54 | --- append_one 55 | 56 | -- filmadalru.h: the header file about adaptive LRU 57 | --- globalchain: hashLRU 58 | --- localchian: localLRU 59 | 60 | -- filmadastorage.h: the header file about data transfer, disk access, cold data eviction 61 | 62 | -- pwlf.h: header file about subrange partion, build learned model 63 | --- piece design 64 | --- append segmentation -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # film 2 | a fully learned index for larger-than-memory databases 3 | 4 | 5 | # 6 | 7 | ### Getting started 8 | FILM can be used as a header-only library 9 | You will need to compile the program with at least the C++17 standard (e.g., '-std = c++17') in the [CMakeLists.txt] 10 | 11 | In this repository, you can compile and run film with [main.cpp] 12 | 13 | There are some examples in [main.cpp] 14 | -- test_interleave_insert_query: test the workload that interleave between inserts and queries, 15 | -- test_out_of_order_insertion: test out-of-order insertion, control by parameter out_of_order_frac 16 | -- test_query_baseline: test the query workload that with a fixed number of point and/or range queries 17 | 18 | 19 | 20 | ### Datasets and data type 21 | You can also run this benchmark on your own dataset. 22 | change the filepath in [data.h] 23 | the format of your dataset need to be in either binary format or text format (one key per line). 24 | the datasets we used in paper are in text format, binary format are also supported in [data.sh] with the 'load_binary_data'. 25 | 26 | the currently support data type of keys are long int or double. 27 | need to modify (typedef double key_type;) or (typedef long int key_type;) in [film.h] 28 | 29 | the header file about datasets and workloads are data.h, zipf.h 30 | 31 | -- 'loaddata' 32 | -- get the searched keys at runtime 33 | ----'get_search_keys_zipf' 34 | ----'get_search_keys' 35 | ----'get_search_keys_scrambledzipf' 36 | ----'get_search_keys_hotspot' 37 | 38 | -- get the searched ranges at runtime 39 | ----'get_search_ranges_zipf' 40 | ----'get_search_ranges' 41 | ----'get_search_ranges_scrambledzipf' 42 | ----'get_search_ranges_hotspot' 43 | 44 | -- 'loadpquery' (pre-generated searched keys) 45 | -- 'loadrquery' (pre-generated searched keys) 46 | 47 | 48 | the core source file of film are in film.h, filmadalru.h, filmadastorage, pwlf.h 49 | 50 | -- film.h: the core source file of film 51 | --- test_interleave_insert_query 52 | --- search_one 53 | --- search_range 54 | --- append_one 55 | 56 | -- filmadalru.h: the header file about adaptive LRU 57 | --- globalchain: hashLRU 58 | --- localchian: localLRU 59 | 60 | -- filmadastorage.h: the header file about data transfer, disk access, cold data eviction 61 | 62 | -- pwlf.h: header file about subrange partion, build learned model 63 | --- piece design 64 | --- append segmentation 65 | 66 | 67 | 68 | 69 | 70 | ## the preformance of record size 71 | the model size comparison of differnet methods in terms of record size. 72 | 73 | 74 | 75 | 76 | ![recordSizewiki_ts_add](https://user-images.githubusercontent.com/51820918/155705150-5a7aa409-503d-4ef0-9e06-ef00f2fc7db8.png) 77 | 78 | ## the range query performance with different amount of available memory 79 | 80 | 81 | 82 | 83 | ## dataset 84 | the books and wiki_ts are come from SOSD. ref: https://github.com/learnedsystems/SOSD 85 | 86 | the optimal solution of generating piece-wise-linear functions has well studied by computional geometry [ref: Joseph O’Rourke. 1981. An on-line algorithm for fitting straight lines between data ranges. Commun. ACM 24, 9 (1981), 574–578.], and the PGM-Index has implemented it in C++ implementation[ref: https://github.com/gvinciguerra/PGM-index], the learned model of FILM is on the basis of the segmentation from PGM-index. 87 | 88 | -------------------------------------------------------------------------------- /zipf.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT license. 3 | // from ALEX 4 | // the github address of ALEX: 5 | 6 | // Zipf generator, inspired by 7 | // https://github.com/brianfrankcooper/YCSB/blob/master/core/src/main/java/site/ycsb/generator/ScrambledZipfianGenerator.java 8 | // https://github.com/brianfrankcooper/YCSB/blob/master/core/src/main/java/site/ycsb/generator/ZipfianGenerator.java 9 | 10 | #ifndef ZIPF_H 11 | #define ZIPF_H 12 | 13 | #include 14 | 15 | class ScrambledZipfianGenerator { 16 | public: 17 | static constexpr double ZETAN = 26.46902820178302; 18 | static constexpr double ZIPFIAN_CONSTANT = 0.99; 19 | 20 | int num_keys_; 21 | double alpha_; 22 | double eta_; 23 | std::mt19937_64 gen_; 24 | std::uniform_real_distribution dis_; 25 | 26 | explicit ScrambledZipfianGenerator(int num_keys) 27 | : num_keys_(num_keys), gen_(std::random_device{}()), dis_(0, 1) { 28 | double zeta2theta = zeta(2); 29 | alpha_ = 1. / (1. - ZIPFIAN_CONSTANT); 30 | eta_ = (1 - std::pow(2. / num_keys_, 1 - ZIPFIAN_CONSTANT)) / 31 | (1 - zeta2theta / ZETAN); 32 | } 33 | 34 | int nextValue() { 35 | double u = dis_(gen_); 36 | double uz = u * ZETAN; 37 | 38 | int ret; 39 | if (uz < 1.0) { 40 | ret = 0; 41 | } else if (uz < 1.0 + std::pow(0.5, ZIPFIAN_CONSTANT)) { 42 | ret = 1; 43 | } else { 44 | ret = (int)(num_keys_ * std::pow(eta_ * u - eta_ + 1, alpha_)); 45 | } 46 | 47 | ret = fnv1a(ret) % num_keys_; 48 | return ret; 49 | } 50 | 51 | double zeta(long n) { 52 | double sum = 0.0; 53 | for (long i = 0; i < n; i++) { 54 | sum += 1 / std::pow(i + 1, ZIPFIAN_CONSTANT); 55 | } 56 | return sum; 57 | } 58 | 59 | // FNV hash from https://create.stephan-brumme.com/fnv-hash/ 60 | static const uint32_t PRIME = 0x01000193; // 16777619 61 | static const uint32_t SEED = 0x811C9DC5; // 2166136261 62 | /// hash a single byte 63 | inline uint32_t fnv1a(unsigned char oneByte, uint32_t hash = SEED) { 64 | return (oneByte ^ hash) * PRIME; 65 | } 66 | /// hash a 32 bit integer (four bytes) 67 | inline uint32_t fnv1a(int fourBytes, uint32_t hash = SEED) { 68 | const unsigned char* ptr = (const unsigned char*)&fourBytes; 69 | hash = fnv1a(*ptr++, hash); 70 | hash = fnv1a(*ptr++, hash); 71 | hash = fnv1a(*ptr++, hash); 72 | return fnv1a(*ptr, hash); 73 | } 74 | }; 75 | 76 | 77 | // 78 | // Created by CCMa on 2022/7/19. 79 | // 80 | 81 | #ifndef ZIPFGENRATION_SERIESZIPF_H 82 | #define ZIPFGENRATION_SERIESZIPF_H 83 | // 84 | // Created by CCMa on 2022/7/19. 85 | // 86 | 87 | #include 88 | #include 89 | #include 90 | 91 | /** Zipf-like random distribution. 92 | * 93 | * "Rejection-inversion to generate variates from monotone discrete 94 | * distributions", Wolfgang Hörmann and Gerhard Derflinger 95 | * ACM TOMACS 6.3 (1996): 169-184 96 | */ 97 | template 98 | class zipf_distribution 99 | { 100 | public: 101 | typedef RealType input_type; 102 | typedef IntType result_type; 103 | 104 | static_assert(std::numeric_limits::is_integer, ""); 105 | static_assert(!std::numeric_limits::is_integer, ""); 106 | 107 | zipf_distribution(const IntType n=std::numeric_limits::max(), 108 | const RealType q=1.0) 109 | : n(n) 110 | , q(q) 111 | , H_x1(H(1.5) - 1.0) 112 | , H_n(H(n + 0.5)) 113 | , dist(H_x1, H_n) 114 | {} 115 | 116 | IntType operator()(std::mt19937& rng) 117 | { 118 | while (true) { 119 | const RealType u = dist(rng); 120 | const RealType x = H_inv(u); 121 | const IntType k = clamp(std::round(x), 1, n); 122 | if (u >= H(k + 0.5) - h(k)) { 123 | return k; 124 | } 125 | } 126 | } 127 | 128 | private: 129 | /** Clamp x to [min, max]. */ 130 | template 131 | static constexpr T clamp(const T x, const T min, const T max) 132 | { 133 | return std::max(min, std::min(max, x)); 134 | } 135 | 136 | /** exp(x) - 1 / x */ 137 | static double 138 | expxm1bx(const double x) 139 | { 140 | return (std::abs(x) > epsilon) 141 | ? std::expm1(x) / x 142 | : (1.0 + x/2.0 * (1.0 + x/3.0 * (1.0 + x/4.0))); 143 | } 144 | 145 | /** H(x) = log(x) if q == 1, (x^(1-q) - 1)/(1 - q) otherwise. 146 | * H(x) is an integral of h(x). 147 | * 148 | * Note the numerator is one less than in the paper order to work with all 149 | * positive q. 150 | */ 151 | const RealType H(const RealType x) 152 | { 153 | const RealType log_x = std::log(x); 154 | return expxm1bx((1.0 - q) * log_x) * log_x; 155 | } 156 | 157 | /** log(1 + x) / x */ 158 | static RealType 159 | log1pxbx(const RealType x) 160 | { 161 | return (std::abs(x) > epsilon) 162 | ? std::log1p(x) / x 163 | : 1.0 - x * ((1/2.0) - x * ((1/3.0) - x * (1/4.0))); 164 | } 165 | 166 | /** The inverse function of H(x) */ 167 | const RealType H_inv(const RealType x) 168 | { 169 | const RealType t = std::max(-1.0, x * (1.0 - q)); 170 | return std::exp(log1pxbx(t) * x); 171 | } 172 | 173 | /** That hat function h(x) = 1 / (x ^ q) */ 174 | const RealType h(const RealType x) 175 | { 176 | return std::exp(-q * std::log(x)); 177 | } 178 | 179 | static constexpr RealType epsilon = 1e-8; 180 | 181 | IntType n; ///< Number of elements 182 | RealType q; ///< Exponent 183 | RealType H_x1; ///< H(x_1) 184 | RealType H_n; ///< H(n) 185 | std::uniform_real_distribution dist; ///< [H(x_1), H(n)] 186 | }; 187 | 188 | #endif //ZIPFGENRATION_SERIESZIPF_H 189 | 190 | 191 | #endif -------------------------------------------------------------------------------- /filmadalru.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by chaochao on 2021/12/23. 3 | // 4 | 5 | #ifndef EXPERIMENTCC12_FILMADALRU_H 6 | #define EXPERIMENTCC12_FILMADALRU_H 7 | 8 | #include 9 | #define MAX_INT (((unsigned int)(-1))>>1) 10 | // define the node in doubly-linked list 11 | using namespace std; 12 | namespace adalru { 13 | template 14 | struct Node { 15 | Key key; 16 | Value value; 17 | Node *prev; 18 | Node *next; 19 | 20 | Node(Key k, Value v) : key(k), value(v), prev(nullptr), next(nullptr) {}; 21 | 22 | Node() : key(), value(), prev(nullptr), next(nullptr) {}; 23 | ~Node(){ 24 | prev = nullptr; 25 | next = nullptr; 26 | } 27 | // Node(): prev(nullptr), next(nullptr){ }; 28 | }; 29 | 30 | template //mapvalue 是指向在 lru 中的node 的指针 在hash map 中使用 31 | class hashLRU { 32 | 33 | public: 34 | int size = 0; 35 | int capacity; 36 | std::unordered_map map; 37 | Node *head; 38 | Node *tail; 39 | 40 | hashLRU(int def_capcity) { 41 | capacity = def_capcity; 42 | head = new Node; 43 | tail = new Node; 44 | head->next = tail; 45 | tail->prev = head; 46 | } 47 | 48 | hashLRU() { 49 | capacity = MAX_INT; 50 | head = new Node; 51 | tail = new Node; 52 | head->next = tail; 53 | tail->prev = head; 54 | } 55 | 56 | 57 | // get the k node in LRU and move it to the head of LRU 58 | Node *get(Key k) { 59 | Node *node; 60 | if (map.find(k) != map.end()) { 61 | node = map[k]; 62 | moveTohead(node); 63 | return node; 64 | }// k is existed in map 65 | else { 66 | return node; 67 | } 68 | 69 | } 70 | 71 | // 将node 插入到LRU 的head 72 | void appendhead(Node *node) { 73 | node->prev = head; 74 | node->next = head->next; 75 | head->next->prev = node; 76 | head->next = node; 77 | } 78 | 79 | // 将 找到的node 移动到head 80 | void moveTohead(Node *node) { 81 | if (node->prev == head) return; 82 | node->prev->next = node->next; 83 | node->next->prev = node->prev; 84 | appendhead(node); 85 | } 86 | 87 | // put the k node into the head of LRU 88 | void put(Key k, Value v) { 89 | if (map.find(k) == map.end()) // k is not existed in map 90 | { 91 | Node *node = new Node(k, v); 92 | map[k] = node; 93 | // 判断 size, 如果size = capacity,说明LRU 满了 94 | if (size == capacity) { 95 | poptail(); 96 | } 97 | size += 1; 98 | appendhead(node); 99 | } else { 100 | // map[k]->value = v; 101 | moveTohead(map[k]); 102 | } 103 | } 104 | 105 | //remove the k node from LRU 106 | forceinline void remove(Key k) { 107 | // 首先找到k 所属的node 108 | if (map.find(k) == map.end()) return; // 说明要删除的node 不存在 109 | Node *node = map[k]; 110 | node->prev->next = node->next; 111 | node->next->prev = node->prev; 112 | map.erase(k); 113 | // malloc_trim(0); 114 | size -= 1; 115 | } 116 | 117 | //remove the k node from LRU 118 | forceinline void removenode(Node *node) { 119 | // 首先找到k 所属的node 120 | node->prev->next = node->next; 121 | node->next->prev = node->prev; 122 | if (map.find(node->key) == map.end()){ 123 | cout<< "i need You, my lovely Lord, please come"<< endl; 124 | } 125 | else 126 | map.erase(node->key); 127 | // malloc_trim(0); 128 | size -= 1; 129 | } 130 | 131 | // pop the tail of the LRU, that the least recent used item 132 | forceinline Node *poptail() { 133 | // map.erase(tail->prev->key); 134 | map.erase(tail->prev->key); 135 | Node *node; 136 | node = tail->prev; 137 | tail->prev->prev->next = tail; 138 | tail->prev = tail->prev->prev; 139 | size -= 1; 140 | return node; 141 | } 142 | 143 | //get the tail node from local LRU that from this leaf evict key 144 | Value get_tail() { 145 | auto tailnode = tail->prev; 146 | if (tailnode->value->intrachain.size==1) 147 | removenode(tailnode); 148 | return tailnode->value; 149 | } 150 | 151 | int deletelru(){ 152 | 153 | while (head->next!=NULL){ 154 | auto curnode = head->next; 155 | curnode->prev = head; 156 | head->next = curnode->next; 157 | delete curnode; 158 | } 159 | delete head; 160 | // delete[] tail; 161 | std::unordered_map().swap(map); 162 | malloc_trim(0); 163 | return 0; 164 | } 165 | 166 | }; 167 | 168 | template 169 | class localLRU { 170 | public: 171 | int size = 0; 172 | 173 | Node *head; 174 | Node *tail; 175 | 176 | localLRU() { 177 | head = new Node; 178 | tail = new Node; 179 | head->next = tail; 180 | tail->prev = head; 181 | } 182 | void deletelru(){ 183 | while (head->next!=NULL){ 184 | auto curnode = head->next; 185 | curnode->prev = head; 186 | head->next = curnode->next; 187 | // delete []curnode->value; 188 | // curnode->value = NULL; 189 | delete curnode; 190 | curnode = NULL; 191 | } 192 | // malloc_trim(0); 193 | } 194 | 195 | // 将node 插入到LRU 的head 196 | forceinline Node *appendhead(Node *node) { 197 | node->prev = head; 198 | node->next = head->next; 199 | head->next->prev = node; 200 | head->next = node; 201 | } 202 | 203 | // 将 找到的node 移动到head 204 | forceinline void moveTohead(Node *node) { 205 | if (node->prev == head) return; 206 | node->prev->next = node->next; 207 | node->next->prev = node->prev; 208 | appendhead(node); 209 | } 210 | 211 | // put the k node into the head of LRU 212 | forceinline Node* put(Key k, Value v) { 213 | Node *node = new Node(k, v); 214 | size += 1; 215 | appendhead(node); 216 | // if (k > 4294946590) 217 | // cout<< "my Lord ,i need You!"<< endl; 218 | return node; 219 | } 220 | 221 | // pair*> put(Value data) { 222 | // 223 | // Node *node = new Node(data[0], data); 224 | // size += 1; 225 | // appendhead(node); 226 | // return pair*> (true,node); 227 | // } 228 | 229 | 230 | //remove the k node from LRU 231 | forceinline void remove_node(Node *node) { 232 | node->prev->next = node->next; 233 | node->next->prev = node->prev; 234 | delete[] node->value; 235 | delete node; 236 | // malloc_trim(0); 237 | size -= 1; 238 | } 239 | 240 | // modify the offset (key in lru node), 所有 nodes 中,key > pos 都都需要+ 1 241 | forceinline void modify(size_t pos) { 242 | // 遍历 intro chain 243 | // while 循环,直到找到node->key == k; 244 | Node *node = head->next; 245 | while (node != tail) 246 | { 247 | if (node->key > pos) 248 | { 249 | node->key += 1; 250 | } 251 | node = node->next; 252 | } 253 | } 254 | 255 | 256 | 257 | // pop the tail of the local LRU, that the least recent used item 258 | forceinline Node *poptail() { 259 | Node *node; 260 | node = tail->prev; 261 | tail->prev->prev->next = tail; 262 | tail->prev = tail->prev->prev; 263 | size -= 1; 264 | return node; 265 | } 266 | 267 | }; 268 | 269 | } 270 | #endif //EXPERIMENTCC12_FILMADALRU_H 271 | -------------------------------------------------------------------------------- /main.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * F:\我的坚果云\2021-3-6hybridlearnedindex\ExperimentCC12\filminsert 3 | * path: /home/wamdm/chaohong/clionDir/insertfilm 4 | * support append-only setting 5 | * support query 6 | */ 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #include 16 | 17 | #include 18 | 19 | #include "data.h" 20 | #include "film.h" 21 | 22 | 23 | #define MAX_INT (((unsigned int)(-1))>>1) 24 | 25 | 26 | 27 | int test_interleave_insert_query(string filename, double memThreshold, long int datasize, long int numcolumn, int pagesize,unsigned int errbnd,double insert_frac){ 28 | std::cout << "my Lord, i need You, i trust in You! test film interleave insert (append&out-of-order insert)and query" << std::endl; 29 | struct timeval t1, t2; 30 | double timeuse; 31 | 32 | unsigned long int numkey = ceil(double(datasize*1024*1024)/numcolumn/8); //int datanum 33 | unsigned long actual_numkey = ceil(numkey*1.2); // num_key and actual_numkey, the former is the at least number to be inserted to the index. the latter is the keys to guarantee the numkey 34 | auto keys = new key_type[actual_numkey]; 35 | loaddata(keys, filename,actual_numkey); 36 | std::vector load_data(keys,keys+actual_numkey); 37 | double reserveMem = 10; 38 | memThreshold -= reserveMem; 39 | 40 | double zipf = 0.75; //zipfian factor 41 | string workload = "zipf"; 42 | 43 | std::cout << "my Lord, i need You, i trust in You!!!!!" << std::endl; 44 | gettimeofday(&t1, NULL); 45 | 46 | 47 | cout<<"the data set is "<(datasize)*1024*1024)/static_cast(numcolumn)/8); 92 | auto keys = new key_type[numkey]; 93 | loaddata(keys, filename,numkey); 94 | std::vector load_data(keys,keys+numkey); 95 | delete []keys; 96 | double reserveMem = 5; 97 | memThreshold = memThreshold-reserveMem; 98 | unsigned int queryn = 100000; // 100000 99 | string workload = "zipfrandom"; //"random" 100 | double zipf = 0.75; //zipfian factor 101 | vector load_pointquery ; 102 | vector> load_rangequery; 103 | 104 | 105 | std::cout << "my Lord, i need You, i trust in You!!!!!" << std::endl; 106 | gettimeofday(&t1, NULL); 107 | double zipfs[] = {0.75}; //,0.25,0.5,0.75,1.0,1.25,1.5 108 | string workloads[] = {"zipf"}; ///"zipf","random","zipfrandom","hotspot" 109 | gettimeofday(&t1, NULL); 110 | for (int i = 0; i < (end(workloads)-begin(workloads));i++){ 111 | gettimeofday(&t1, NULL); 112 | cout<<"the data set is "<,key_type>(filename,queryn,numkey,zipf,workload); 119 | load_pointquery = loadpquery(filename,queryn,numkey,zipf,workload); 120 | 121 | filminsert::test_filmadaquery(errbnd,numkey,pagesize,filename,memThreshold,reserveMem,numcolumn,load_data,load_pointquery,load_rangequery,queryn,numkey,datasize,zipf,workload); 122 | 123 | gettimeofday(&t2, NULL); 124 | timeuse = (t2.tv_sec - t1.tv_sec) + (double) (t2.tv_usec - t1.tv_usec) / 1000000.0; 125 | cout << "able o_direct disk access time = " << timeuse << endl; //输出时间(单位:s) 126 | } 127 | 128 | vector ().swap(load_data); 129 | vector ().swap(load_pointquery); 130 | vector >().swap(load_rangequery); 131 | 132 | 133 | 134 | 135 | return 0; 136 | } 137 | 138 | 139 | int test_interleave_baselines(string filename, double memThreshold, long int datasize, long int numcolumn, int pagesize,unsigned short int errbnd){ 140 | std::cout << "my Lord, i need You, i trust in You! test the interleave insert&query on baselines" << std::endl; 141 | struct timeval t1, t2; 142 | double timeuse; 143 | 144 | unsigned long int numkey = ceil(double(datasize*1024*1024)/numcolumn/8); //int datanum, 145 | unsigned long actual_numkey = ceil(numkey*1.2); // num_key and actual_numkey, the former is the at least number to be inserted to the index. the latter is the keys to guarantee the numkey 146 | auto keys = new key_type[actual_numkey]; 147 | loaddata(keys, filename,actual_numkey); 148 | std::vector load_data(keys,keys+actual_numkey); 149 | delete []keys; 150 | double reserveMem = 10; 151 | memThreshold = memThreshold-reserveMem; 152 | // string workload = "random"; //"random" 153 | string workloads[] = {"zipf","random","zipfrandom","hotspot"}; ///"zipf","random","zipfrandom","hotspot" 154 | double zipf = 0.75; //zipfian factor 155 | 156 | std::cout << "my Lord, i need You, i trust in You!!!!!" << std::endl; 157 | gettimeofday(&t1, NULL); 158 | unsigned long init_num_key = ceil(numkey*0.75); 159 | 160 | unsigned int batch_size = 100000; //100000 161 | // double insertfracs[] = {0.5}; 162 | gettimeofday(&t1, NULL); 163 | for (int i = 0; i < (end(workloads)-begin(workloads));i++){ 164 | // double insertfrac = insertfracs[i]; 165 | double insertfrac = 0.5; 166 | string workload = workloads[i]; 167 | filminsert::test_interleave_insert_query_baseline(errbnd,numkey,pagesize,filename,memThreshold, 168 | reserveMem,numcolumn,load_data,actual_numkey,datasize,zipf,init_num_key, insertfrac, 0.0, batch_size,workload); 169 | 170 | gettimeofday(&t1, NULL); 171 | cout<<"the data set is "< 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include "zipf.h" 20 | 21 | 22 | template 23 | bool load_binary_data(T data[], int length, const std::string& file_path) { 24 | std::ifstream is(file_path.c_str(), std::ios::binary | std::ios::in); 25 | if (!is.is_open()) { 26 | return false; 27 | } 28 | is.read(reinterpret_cast(data), std::streamsize(length * sizeof(T))); 29 | is.close(); 30 | return true; 31 | } 32 | 33 | template 34 | bool load_text_data(T array[], int length, const std::string& file_path) { 35 | std::ifstream is(file_path.c_str()); 36 | if (!is.is_open()) { 37 | return false; 38 | } 39 | int i = 0; 40 | std::string str; 41 | double a; 42 | while (std::getline(is, str) && i < length) { 43 | std::istringstream ss(str); 44 | ss >> a; 45 | array[i] = a; 46 | ss >> array[i]; 47 | i++; 48 | } 49 | is.close(); 50 | 51 | return true; 52 | 53 | } 54 | 55 | 56 | 57 | /// load the 'dset' dataset, the number of loaded keys is 'numkeys' 58 | template 59 | bool loaddata(type_key data[], std::string dset, size_t numkeys) { 60 | std::string filepath; 61 | //std::vector keys (numkeys); 62 | 63 | if (!dset.compare("books")) { 64 | filepath = "/home/wamdm/chaohong/clionDir/FeasFearCPP/dataset/books.txt"; 65 | load_text_data(data, numkeys, filepath); 66 | } else if (!dset.compare("random0.5")) { 67 | filepath = "/home/wamdm/chaohong/clionDir/FeasFearCPP/dataset/random0.5_83886080.txt"; 68 | load_text_data(data, numkeys, filepath); 69 | } else if (!dset.compare("wiki_ts")) { 70 | filepath = "/home/wamdm/chaohong/clionDir/FeasFearCPP/dataset/wiki_ts_100000000.txt"; 71 | load_text_data(data, numkeys, filepath); 72 | } else if (!dset.compare("astro_ra")) { 73 | filepath = "/home/wamdm/chaohong/clionDir/FeasFearCPP/dataset/astro_ra_18_21_89523861.txt"; 74 | load_text_data(data, numkeys, filepath); 75 | } 76 | else if (!dset.compare("lognormal")){ 77 | filepath = "/home/wamdm/chaohong/dataAchive/ALEX_1D/lognormal-190M.bin.data"; 78 | load_binary_data(data, numkeys, filepath); 79 | } 80 | else if (!dset.compare("longlat")){ 81 | filepath = "/home/wamdm/chaohong/dataAchive/ALEX_1D/longlat-200M.bin.data"; 82 | load_binary_data(data, numkeys, filepath); 83 | } 84 | else if (!dset.compare("longitudes")){ 85 | filepath = "/home/wamdm/chaohong/dataAchive/ALEX_1D/longitudes-200M.bin.data"; 86 | load_binary_data(data, numkeys, filepath); 87 | } 88 | else if (!dset.compare("ycsb")){ 89 | filepath = "/home/wamdm/chaohong/dataAchive/ALEX_1D/ycsb-200M.bin.data"; 90 | load_binary_data(data, numkeys, filepath); 91 | } 92 | else { 93 | printf("data name is wrong"); 94 | exit(0); 95 | } 96 | return true; 97 | } 98 | 99 | std::vector split(std::string str, std::string pattern) 100 | { 101 | std::string::size_type pos; 102 | std::vector result; 103 | str += pattern;//扩展字符串以方便操作 104 | int size = str.size(); 105 | for (int i = 0; i < size; i++) 106 | { 107 | pos = str.find(pattern, i); 108 | if (pos < size) 109 | { 110 | std::string s = str.substr(i, pos - i); 111 | result.push_back(s); 112 | i = pos + pattern.size() - 1; 113 | } 114 | } 115 | return result; 116 | } 117 | 118 | template 119 | std::vector loadpquery(std::string queryname, size_t queryn, int datanum, double a,std::string workload) { 120 | std::string filepath; 121 | std::vector queries(queryn); 122 | 123 | std::ostringstream ossb,ossc; 124 | ossb << queryn; 125 | ossc << datanum; 126 | std::string str_a; 127 | if (a==0) 128 | str_a = "0.0"; 129 | else if (a==0.25) 130 | str_a = "0.25"; 131 | else if (a==0.5) 132 | str_a = "0.5"; 133 | else if (a==0.75) 134 | str_a = "0.75"; 135 | else if (a==1.0) 136 | str_a = "1.0"; 137 | else if (a==1.25) 138 | str_a = "1.25"; 139 | else if (a==1.5) 140 | str_a = "1.5"; 141 | else 142 | str_a = "1.25"; 143 | 144 | 145 | // filepath = "/home/wamdm/chaohong/clionDir/FeasFearCPP/txtPQuery/workload" + queryname + "point_Zipf_" + str_a + 146 | // "_" + ossb.str() + "_" + ossc.str() + ".txt"; 147 | if (workload == "zipf"){ 148 | filepath = "/home/wamdm/chaohong/clionDir/FeasFearCPP/txtRQuery/workload"+queryname+"point_Zipf_"+str_a+"_"+ossb.str()+"_"+ossc.str()+".txt"; 149 | } 150 | else if (workload == "random"){ 151 | filepath = "/home/wamdm/chaohong/clionDir/FeasFearCPP/txtPQuery/workload"+queryname+"point_Zipf_0.0"+"_"+ossb.str()+"_"+ossc.str()+".txt"; 152 | } 153 | else if (workload == "zipfrandom"){ 154 | filepath = "/home/wamdm/chaohong/clionDir/FeasFearCPP/txtPQuery/shuffle"+queryname+"point_Zipf_0.75"+"_"+ossb.str()+"_"+ossc.str()+".txt"; 155 | } 156 | else { // workload == "hotspot" 157 | filepath = "/home/wamdm/chaohong/clionDir/FeasFearCPP/txtPQuery/workload"+queryname+"point_hotspot_0.15"+"_"+ossb.str()+"_"+ossc.str()+".txt"; 158 | } 159 | std::ifstream fin(filepath); 160 | std::string s; 161 | double tmp; 162 | for (unsigned int i = 0; i < queryn; i++) { 163 | getline(fin, s); 164 | std::istringstream istr1(s); 165 | istr1 >> tmp; 166 | queries[i] = tmp; 167 | } 168 | return queries; 169 | } 170 | 171 | template 172 | std::vector loadrquery(std::string queryname, size_t queryn, int datanum, double a,std::string workload) { 173 | std::string filepath; 174 | std::vector queries(queryn); 175 | 176 | std::ostringstream ossb,ossc; 177 | ossb << queryn; 178 | ossc << datanum; 179 | std::string str_a; 180 | if (a==0) 181 | str_a = "0.0"; 182 | else if (a==0.25) 183 | str_a = "0.25"; 184 | else if (a==0.5) 185 | str_a = "0.5"; 186 | else if (a==0.75) 187 | str_a = "0.75"; 188 | else if (a==1.0) 189 | str_a = "1.0"; 190 | else if (a==1.25) 191 | str_a = "1.25"; 192 | else if (a==1.5) 193 | str_a = "1.5"; 194 | else 195 | str_a = "1.25"; 196 | if (workload == "zipf"){ 197 | filepath = "/home/wamdm/chaohong/clionDir/FeasFearCPP/txtRQuery/workload"+queryname+"range_Zipf_"+str_a+"_"+ossb.str()+"_"+ossc.str()+".txt"; 198 | } 199 | else if (workload == "random"){ 200 | filepath = "/home/wamdm/chaohong/clionDir/FeasFearCPP/txtRQuery/workload"+queryname+"range_Zipf_0.0"+"_"+ossb.str()+"_"+ossc.str()+".txt"; 201 | } 202 | else if (workload == "zipfrandom"){ 203 | filepath = "/home/wamdm/chaohong/clionDir/FeasFearCPP/txtRQuery/shuffle"+queryname+"range_Zipf_0.75"+"_"+ossb.str()+"_"+ossc.str()+".txt"; 204 | } 205 | else { // workload == "hotspot" 206 | filepath = "/home/wamdm/chaohong/clionDir/FeasFearCPP/txtRQuery/workload"+queryname+"range_hotspot_0.15"+"_"+ossb.str()+"_"+ossc.str()+".txt"; 207 | } 208 | 209 | std::ifstream fin(filepath); 210 | std::string s; 211 | double tmp; 212 | std::vector tmps(2); 213 | for (unsigned int i = 0; i < queryn; i++) { 214 | getline(fin, s); 215 | 216 | std::vector result = split(s," "); 217 | for (int k =0; k< result.size();k++){ 218 | std::istringstream istr1(result[k]); 219 | 220 | istr1 >> tmp; 221 | tmps[k] = tmp; 222 | queries[i] = tmps; 223 | } 224 | } 225 | 226 | return queries; 227 | } 228 | 229 | template 230 | T* get_search_keys(std::vector array, int num_keys, int num_searches) { 231 | std::mt19937_64 gen(std::random_device{}()); 232 | std::uniform_int_distribution dis(0, num_keys - 1); 233 | auto* keys = new T[num_searches]; 234 | for (int i = 0; i < num_searches; i++) { 235 | int pos = dis(gen); 236 | keys[i] = array[pos]; 237 | } 238 | return keys; 239 | } 240 | 241 | template 242 | T* get_search_keys_scrambledzipf(std::vector array, unsigned long int num_keys, unsigned int num_searches) { 243 | auto* keys = new T[num_searches]; 244 | ScrambledZipfianGenerator zipf_gen(num_keys); 245 | for (unsigned int i = 0; i < num_searches; i++) { 246 | unsigned int pos = zipf_gen.nextValue(); 247 | keys[i] = array[pos]; 248 | } 249 | return keys; 250 | } 251 | 252 | template 253 | T* get_search_keys_zipf(std::vector array, unsigned long int num_keys, unsigned long int num_searches, double zipf_factor) { 254 | 255 | auto* keys = new T[num_searches]; 256 | zipf_distribution zipf_gen(num_keys,zipf_factor); 257 | std::mt19937 gen_; 258 | std::vector zipf_res; 259 | for (unsigned int i = 0; i< num_searches; i ++){ 260 | unsigned int pos = num_keys - zipf_gen.operator()(gen_); 261 | keys[i] = array[pos]; 262 | } 263 | return keys; 264 | } 265 | 266 | 267 | template 268 | T* get_search_keys_hotspot(std::vector array, unsigned long int num_keys, unsigned long int num_searches, double hotratio=0.2,double accessratio = 0.9) { 269 | unsigned int hotspotlen = num_keys * hotratio; 270 | unsigned int hotqueryn = num_searches*accessratio; 271 | unsigned int randomqueryn = num_searches - hotqueryn; 272 | auto* keys = new T[num_searches]; 273 | zipf_distribution zipf_gen(num_keys,0.75); 274 | std::mt19937_64 gen_random(std::random_device{}()); 275 | std::mt19937 gen_; 276 | std::vector hospot_res; 277 | unsigned int hot_start = num_keys - zipf_gen.operator()(gen_); 278 | while (num_keys-hot_start < hotspotlen) 279 | hot_start = hot_start-hotspotlen; 280 | std::uniform_int_distribution dis1(hot_start, hot_start+hotspotlen); 281 | for (unsigned int i = 0; i< hotqueryn; i ++){ 282 | unsigned int pos = dis1(gen_random); 283 | keys[i] = array[pos]; 284 | } 285 | 286 | std::uniform_int_distribution dis2(0, num_keys - 1); 287 | for (unsigned int i = 0;i 298 | T** get_search_ranges(std::vector array, int num_keys, int num_searches,int minlen = 0,int maxlen =100) { 299 | std::mt19937_64 gen(std::random_device{}()); 300 | std::uniform_int_distribution dis(0, num_keys - maxlen); 301 | std::mt19937_64 gen_random(std::random_device{}()); 302 | std::uniform_int_distribution disrange(minlen, maxlen); 303 | auto* ranges = new T*[num_searches]; 304 | for (int i = 0; i < num_searches; i++) { 305 | unsigned int pos = dis(gen); 306 | ranges[i] = new T[2]; 307 | ranges[i][0] = array[pos]; 308 | ranges[i][1] = array[pos+disrange(gen_random)]; 309 | } 310 | return ranges; 311 | } 312 | 313 | 314 | template 315 | T** get_search_ranges_zipf(std::vector array, unsigned long int num_keys, unsigned long int num_searches, double zipf_factor,unsigned int minlen = 0,unsigned int maxlen=100) { 316 | 317 | auto* ranges = new T*[num_searches]; 318 | long int upper = num_keys-maxlen; 319 | zipf_distribution zipf_gen(upper,zipf_factor); 320 | std::mt19937 gen_; 321 | std::uniform_int_distribution disrange(minlen, maxlen); 322 | std::mt19937_64 gen_random(std::random_device{}()); 323 | for (int i = 0; i< num_searches; i ++){ 324 | unsigned int pos = upper - zipf_gen.operator()(gen_); 325 | ranges[i] = new T[2]; 326 | ranges[i][0] = array[pos]; 327 | unsigned int pos2 = pos+disrange(gen_random); 328 | // if (pos2>num_keys-1){ 329 | // std::cout<< "my Lord, i need You!"<< std::endl; 330 | // } 331 | ranges[i][1] = array[pos2]; 332 | } 333 | return ranges; 334 | } 335 | 336 | template 337 | T** get_search_ranges_scrambledzipf(std::vector array, unsigned long int num_keys, unsigned int num_searches,int minlen = 0,int maxlen=100) { 338 | 339 | auto* ranges = new T*[num_searches]; 340 | ScrambledZipfianGenerator zipf_gen(num_keys-maxlen); 341 | std::mt19937 gen_; 342 | std::uniform_int_distribution disrange(minlen, maxlen); 343 | std::mt19937_64 gen_random(std::random_device{}()); 344 | for (unsigned int i = 0; i< num_searches; i ++){ 345 | unsigned int pos = zipf_gen.nextValue(); 346 | ranges[i] = new T[2]; 347 | ranges[i][0] = array[pos]; 348 | ranges[i][1] = array[pos+disrange(gen_random)]; 349 | } 350 | return ranges; 351 | } 352 | 353 | 354 | template 355 | T** get_search_ranges_hotspot(std::vector array, unsigned long int num_keys, unsigned long int num_searches, double hotratio=0.2,double accessratio = 0.9,int minlen = 0,int maxlen=100) { 356 | unsigned int hotspotlen = num_keys * hotratio; 357 | unsigned int hotqueryn = num_searches*accessratio; 358 | unsigned int randomqueryn = num_searches - hotqueryn; 359 | auto* ranges = new T*[num_searches]; 360 | zipf_distribution zipf_gen(num_keys,0.5); 361 | std::mt19937_64 gen_random(std::random_device{}()); 362 | std::mt19937 gen_; 363 | 364 | unsigned int hot_start = num_keys - zipf_gen.operator()(gen_); 365 | while (num_keys-hot_start< hotspotlen ) 366 | hot_start = hot_start-hotspotlen; 367 | while (hot_start+hotspotlen+maxlen> num_keys ) 368 | hot_start = hot_start-2*maxlen; 369 | std::uniform_int_distribution dis1(hot_start, hot_start+hotspotlen); 370 | std::uniform_int_distribution disrange(minlen, maxlen); 371 | for (unsigned int i = 0; i< hotqueryn; i ++){ 372 | unsigned int pos = dis1(gen_random); 373 | if (poshot_start+hotspotlen) 376 | std::cout<<"i need You, my lovely Lord"< dis2(0, num_keys - maxlen); 384 | for (unsigned int i = 0;i 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | #include 19 | #include 20 | #include "film.h" 21 | 22 | #define forceinline inline __attribute__((__always_inline__)) 23 | using namespace std; 24 | namespace filminsert::internal{ 25 | // std::conditional_t< 条件std::is_floating_point_v, 条件为真 则 long double,条件为假 则 std::conditional_t<(sizeof(T) < 8), int64_t, __int128>>; 26 | template // std::is_floating_point_v 检查T 是否为浮点类型,是则为true, else false 27 | using LargeSigned = typename std::conditional_t, 28 | long double, 29 | std::conditional_t<(sizeof(T) <8), int64_t, __int128>>; // chaochao modify "< " to <= 30 | 31 | template 32 | class insertPWLF{ 33 | using SX = LargeSigned; 34 | using SY = LargeSigned; 35 | struct Slope { 36 | SX dx{}; 37 | SY dy{}; 38 | 39 | bool operator<(const Slope &p) const { return dy * p.dx < dx * p.dy; } // operator 运算符重载 40 | bool operator>(const Slope &p) const { return dy * p.dx > dx * p.dy; } 41 | bool operator==(const Slope &p) const { return dy * p.dx == dx * p.dy; } 42 | bool operator!=(const Slope &p) const { return dy * p.dx != dx * p.dy; } 43 | explicit operator long double() const { return dy / (long double) dx; } 44 | }; 45 | 46 | struct Point { 47 | X x{}; 48 | Y y{}; 49 | 50 | Slope operator-(const Point &p) const { return {SX(x) - p.x, SY(y) - p.y}; } 51 | }; 52 | 53 | public: 54 | const Y epsilon; 55 | std::vector lower; 56 | std::vector upper; 57 | X first_x = 0; 58 | X last_x = 0; 59 | unsigned long lower_start = 0; 60 | unsigned long upper_start = 0; 61 | unsigned long points_in_hull = 0; 62 | Point rectangle[4]; 63 | queue< Point > buffqueue; 64 | 65 | 66 | 67 | auto cross(const Point &O, const Point &A, const Point &B) const { 68 | auto OA = A - O; 69 | auto OB = B - O; 70 | 71 | // long int tmp = OA.dx * OB.dy - OA.dy * OB.dx; 72 | // std::cout<< tmp <().swap(upper); 95 | std::vector().swap(lower); 96 | // delete [] rectangle; 97 | queue< Point >().swap(buffqueue); 98 | malloc_trim(0); 99 | } 100 | 101 | bool add_point(const X &x, const Y &y) { 102 | // if (points_in_hull > 0 && x <= last_x) 103 | // throw std::logic_error("Points must be increasing by x."); 104 | 105 | last_x = x; 106 | auto max_y = std::numeric_limits::max(); 107 | auto min_y = std::numeric_limits::lowest(); 108 | Point p1{x, y >= max_y - epsilon ? max_y : y + epsilon}; 109 | Point p2{x, y <= min_y + epsilon ? min_y : y - epsilon}; 110 | 111 | if (points_in_hull == 0) { 112 | first_x = x; 113 | rectangle[0] = p1; 114 | rectangle[1] = p2; 115 | upper.clear(); 116 | lower.clear(); 117 | upper.push_back(p1); 118 | lower.push_back(p2); 119 | upper_start = lower_start = 0; 120 | ++points_in_hull; 121 | return true; 122 | } 123 | 124 | if (points_in_hull == 1) { 125 | rectangle[2] = p2; 126 | rectangle[3] = p1; 127 | upper.push_back(p1); 128 | lower.push_back(p2); 129 | ++points_in_hull; 130 | return true; 131 | } 132 | 133 | auto slope1 = rectangle[2] - rectangle[0]; 134 | auto slope2 = rectangle[3] - rectangle[1]; 135 | bool outside_line1 = p1 - rectangle[2] < slope1; 136 | bool outside_line2 = p2 - rectangle[3] > slope2; 137 | 138 | if (outside_line1 || outside_line2) { 139 | points_in_hull = 0; 140 | return false; 141 | } 142 | 143 | if (p1 - rectangle[1] < slope2) { 144 | // Find extreme slope 145 | auto min = lower[lower_start] - p1; 146 | auto min_i = lower_start; 147 | for (auto i = lower_start + 1; i < lower.size(); i++) { 148 | auto val = lower[i] - p1; 149 | if (val > min) 150 | break; 151 | min = val; 152 | min_i = i; 153 | } 154 | 155 | rectangle[1] = lower[min_i]; 156 | rectangle[3] = p1; 157 | lower_start = min_i; 158 | 159 | // Hull update 160 | auto end = upper.size(); 161 | for (; end >= upper_start + 2 && cross(upper[end - 2], upper[end - 1], p1) <= 0; --end) 162 | continue; 163 | upper.resize(end); 164 | upper.push_back(p1); 165 | } 166 | 167 | if (p2 - rectangle[0] > slope1) { 168 | // Find extreme slope 169 | auto max = upper[upper_start] - p2; 170 | auto max_i = upper_start; 171 | for (auto i = upper_start + 1; i < upper.size(); i++) { 172 | auto val = upper[i] - p2; 173 | if (val < max) 174 | break; 175 | max = val; 176 | max_i = i; 177 | } 178 | 179 | rectangle[0] = upper[max_i]; 180 | rectangle[2] = p2; 181 | upper_start = max_i; 182 | 183 | // Hull update 184 | auto end = lower.size(); 185 | for (; end >= lower_start + 2 && cross(lower[end - 2], lower[end - 1], p2) >= 0; --end) 186 | continue; 187 | lower.resize(end); 188 | lower.push_back(p2); 189 | } 190 | 191 | ++points_in_hull; 192 | return true; 193 | } 194 | 195 | forceinline bool append_point(const X &x, const Y &y) { 196 | // if (points_in_hull > 0 && x <= last_x) 197 | // throw std::logic_error("Points must be increasing by x."); 198 | 199 | last_x = x; 200 | auto max_y = std::numeric_limits::max(); 201 | auto min_y = std::numeric_limits::lowest(); 202 | Point p1{x, y >= max_y - epsilon ? max_y : y + epsilon}; 203 | Point p2{x, y <= min_y + epsilon ? min_y : y - epsilon}; 204 | 205 | if (points_in_hull < 2) { 206 | buffqueue.push(p1); 207 | buffqueue.push(p2); 208 | if (buffqueue.size() == 4){ 209 | upper_start = lower_start = 0; 210 | rectangle[0] = buffqueue.front(); 211 | first_x = rectangle[0].x; 212 | buffqueue.pop(); 213 | rectangle[1] = buffqueue.front(); 214 | buffqueue.pop(); 215 | rectangle[3] = buffqueue.front(); 216 | buffqueue.pop(); 217 | rectangle[2] = buffqueue.front(); 218 | buffqueue.pop(); 219 | upper.push_back(rectangle[0]); 220 | lower.push_back(rectangle[1]); 221 | upper.push_back(rectangle[3]); 222 | lower.push_back(rectangle[2] ); 223 | points_in_hull ++; 224 | } 225 | else { 226 | points_in_hull ++; 227 | } 228 | // else{ 229 | // cout<< "my Lord, i need You! please have pity on me!!" << endl; 230 | // } 231 | 232 | return true; 233 | } 234 | 235 | // if (points_in_hull == 1) { 236 | // rectangle[2] = p2; 237 | // rectangle[3] = p1; 238 | // upper.push_back(p1); 239 | // lower.push_back(p2); 240 | // ++points_in_hull; 241 | // return true; 242 | // } 243 | 244 | auto slope1 = rectangle[2] - rectangle[0]; 245 | auto slope2 = rectangle[3] - rectangle[1]; 246 | bool outside_line1 = p1 - rectangle[2] < slope1; 247 | bool outside_line2 = p2 - rectangle[3] > slope2; 248 | 249 | if (outside_line1 || outside_line2) { 250 | points_in_hull = 0; 251 | vector().swap(lower); 252 | vector().swap(upper); 253 | lower_start = 0; 254 | upper_start = 0; 255 | return false; 256 | } 257 | 258 | if (p1 - rectangle[1] < slope2) { 259 | // Find extreme slope 260 | auto min = lower[lower_start] - p1; 261 | auto min_i = lower_start; 262 | for (auto i = lower_start + 1; i < lower.size(); i++) { 263 | auto val = lower[i] - p1; 264 | if (val > min) 265 | break; 266 | min = val; 267 | min_i = i; 268 | } 269 | 270 | rectangle[1] = lower[min_i]; 271 | rectangle[3] = p1; 272 | lower_start = min_i; 273 | 274 | // Hull update 275 | auto end = upper.size(); 276 | for (; end >= upper_start + 2 && cross(upper[end - 2], upper[end - 1], p1) <= 0; --end) 277 | continue; 278 | upper.resize(end); 279 | upper.push_back(p1); 280 | } 281 | 282 | if (p2 - rectangle[0] > slope1) { 283 | // Find extreme slope 284 | auto max = upper[upper_start] - p2; 285 | auto max_i = upper_start; 286 | for (auto i = upper_start + 1; i < upper.size(); i++) { 287 | auto val = upper[i] - p2; 288 | if (val < max) 289 | break; 290 | max = val; 291 | max_i = i; 292 | } 293 | 294 | rectangle[0] = upper[max_i]; 295 | rectangle[2] = p2; 296 | upper_start = max_i; 297 | 298 | // Hull update 299 | auto end = lower.size(); 300 | for (; end >= lower_start + 2 && cross(lower[end - 2], lower[end - 1], p2) >= 0; --end) 301 | continue; 302 | lower.resize(end); 303 | lower.push_back(p2); 304 | } 305 | 306 | ++points_in_hull; 307 | return true; 308 | } 309 | 310 | 311 | forceinline CanonicalSegment get_segment() { 312 | if (points_in_hull == 1) 313 | return CanonicalSegment(rectangle[0], rectangle[1], first_x); 314 | return CanonicalSegment(rectangle, first_x); 315 | } 316 | 317 | forceinline CanonicalSegment get_segment( X break_x) { 318 | if (points_in_hull == 1) 319 | return CanonicalSegment(rectangle[0], rectangle[1], first_x); 320 | return CanonicalSegment(rectangle, first_x,break_x); 321 | } 322 | 323 | forceinline CanonicalSegment get_segment( X break_x,std::vector keys_vec) { 324 | if (points_in_hull == 1) 325 | return CanonicalSegment(rectangle[0], rectangle[1], first_x); 326 | return CanonicalSegment(rectangle, first_x,break_x,keys_vec); 327 | } 328 | 329 | 330 | void reset() { 331 | points_in_hull = 0; 332 | vector().swap(lower); 333 | vector().swap(upper); 334 | lower.clear(); 335 | upper.clear(); 336 | } 337 | }; 338 | 339 | template 340 | class insertPWLF::CanonicalSegment { 341 | friend class insertPWLF; 342 | 343 | 344 | public: 345 | std::vector slotkey; 346 | Point rectangle[4]; 347 | X first; 348 | X last; 349 | 350 | CanonicalSegment( X first) : first(first) {}; 351 | CanonicalSegment(const Point &p0, const Point &p1, X first) : rectangle{p0, p1, p0, p1}, first(first) {}; 352 | 353 | CanonicalSegment(const Point &p0, const Point &p1, X first, X last) : rectangle{p0, p1, p0, p1}, first(first) ,last(last) {}; 354 | 355 | CanonicalSegment(const Point (&rectangle)[4], X first) 356 | : rectangle{rectangle[0], rectangle[1], rectangle[2], rectangle[3]}, first(first) {}; 357 | 358 | CanonicalSegment(const Point (&rectangle)[4], X first, X last) 359 | : rectangle{rectangle[0], rectangle[1], rectangle[2], rectangle[3]}, first(first), last(last) {}; 360 | 361 | 362 | 363 | forceinline bool one_point() const { 364 | return rectangle[0].x == rectangle[2].x && rectangle[0].y == rectangle[2].y 365 | && rectangle[1].x == rectangle[3].x && rectangle[1].y == rectangle[3].y; 366 | } 367 | 368 | public: 369 | 370 | CanonicalSegment() = default; 371 | 372 | X get_first_x() const { return first; } 373 | X get_last_x() const { return last; } 374 | 375 | forceinline std::pair get_intersection() const { 376 | auto &p0 = rectangle[0]; 377 | auto &p1 = rectangle[1]; 378 | auto &p2 = rectangle[2]; 379 | auto &p3 = rectangle[3]; 380 | auto slope1 = p2 - p0; 381 | auto slope2 = p3 - p1; 382 | 383 | if (one_point() || slope1 == slope2) 384 | return {p0.x, p0.y}; 385 | 386 | auto p0p1 = p1 - p0; 387 | auto a = slope1.dx * slope2.dy - slope1.dy * slope2.dx; 388 | auto b = (p0p1.dx * slope2.dy - p0p1.dy * slope2.dx) / static_cast(a); 389 | auto i_x = p0.x + b * slope1.dx; 390 | auto i_y = p0.y + b * slope1.dy; 391 | return {i_x, i_y}; 392 | } 393 | 394 | forceinline std::pair get_floating_point_segment(const X &origin) const { 395 | if (one_point()) 396 | return {0, (rectangle[0].y + rectangle[1].y) / 2}; 397 | 398 | if constexpr (std::is_integral_v && std::is_integral_v) { 399 | auto slope = rectangle[3] - rectangle[1]; 400 | auto intercept_n = slope.dy * (SX(origin) - rectangle[1].x); 401 | auto intercept_d = slope.dx; 402 | auto rounding_term = ((intercept_n < 0) ^ (intercept_d < 0) ? -1 : +1) * intercept_d / 2; 403 | auto intercept = (intercept_n + rounding_term) / intercept_d + rectangle[1].y; 404 | return {static_cast(slope), intercept}; 405 | } 406 | 407 | auto[i_x, i_y] = get_intersection(); 408 | auto[min_slope, max_slope] = get_slope_range(); 409 | auto slope = (min_slope + max_slope) / 2.; 410 | auto intercept = i_y - (i_x - origin) * slope; 411 | return {slope, intercept}; 412 | } 413 | 414 | forceinline std::pair get_slope_range() const { 415 | if (one_point()) 416 | return {0, 1}; 417 | 418 | auto min_slope = static_cast(rectangle[2] - rectangle[0]); 419 | auto max_slope = static_cast(rectangle[3] - rectangle[1]); 420 | return {min_slope, max_slope}; 421 | } 422 | }; 423 | 424 | 425 | template 426 | std::pair > append_segmentation(size_t error,std::vector keys, 427 | filmtype *filmada,unsigned int k){ 428 | size_t c = 0; 429 | std::vector startkeys; 430 | 431 | for (size_t i = 0; i < keys.size(); ++i) { 432 | pair p(keys[i],filmada->innerlevels[k]->nextpos++) ; // i 为 pos 433 | ++(filmada->innerlevels[k]->pos); 434 | if (!filmada->innerlevels[k]->opt->add_point(p.first, p.second)) { // 如果inner level 不满足error 了,那么再创建一个innerpiece 435 | 436 | // 当前 innerpiece 不再满足,需要创建new inner piece 并判断该 innerlevel 的上一层level 是否需要更新 437 | auto a = filmada->innerlevels[k]->opt->get_segment(); 438 | 439 | if (filmada->innerlevels[k]->pos > 2) 440 | filmada->innerlevels[k]->innerpieces.pop_back(); 441 | filmada->innerlevels[k]->innerpieces.emplace_back(a); 442 | // 首先在该层创建一个 new innerpiece, 更新该innerpiece,再递归向向上 443 | 444 | filmada->innerlevels[k]->pos = 0; 445 | filmada->innerlevels[k]->nextpos -= 2; 446 | delete filmada->innerlevels[k]->opt; 447 | // filmada->innerlevels[k]->opt.pop_back(); 448 | insertPWLF *inneropt = new insertPWLF(error); 449 | filmada->innerlevels[k]->opt=inneropt; 450 | if (k==0){ 451 | // auto aaaaa = filmada->leaflevel.leafpieces.size()-2; 452 | startkeys.emplace_back(filmada->leaflevel.leafpieces[filmada->leaflevel.leafpieces.size()-1]->startkey); 453 | } 454 | 455 | else{ 456 | // auto aaaaa = filmada->innerlevels[k-1]->innerpieces.size()-2; 457 | startkeys.emplace_back(filmada->innerlevels[k-1]->innerpieces[filmada->innerlevels[k-1]->innerpieces.size()-2].startkey); 458 | } 459 | startkeys.emplace_back(p.first); 460 | auto rr = append_segmentation(error,startkeys,filmada,k); 461 | 462 | ++c; 463 | if (filmada->innerlevels.back()->innerpieces.size() > 1) 464 | { 465 | startkeys.clear(); 466 | startkeys.emplace_back(a.first); 467 | startkeys.emplace_back(p.first); 468 | typename filmtype:: Innerpiece innerpiece;// 创建parent piece 469 | insertPWLF *inneropt = new insertPWLF(error); 470 | // std::pair< std::vector, std::vector*> > *innerlevel = 471 | // new std::pair< std::vector, std::vector*> >; 472 | typename filmtype::Innerlevel *innerlevel = new typename filmtype::Innerlevel; 473 | filmada->innerlevels.emplace_back(innerlevel); 474 | filmada->innerlevels.back()->opt = inneropt ; 475 | auto rr = append_segmentation(error,startkeys,filmada,k+1); 476 | // cout<< "Jesus, i need You !" << endl; 477 | startkeys.emplace_back(a.first); 478 | return std::pair >(++c,startkeys) ; 479 | } 480 | else if (filmada->innerlevels.size() > 1 && (k != filmada->innerlevels.size()-1) ) // 如上为 由于创建了new innner piece,导致了new innerlevel,如下为,虽然创建了new innerpiece,但只需要更新上层的innner level 481 | { 482 | // 更新上层的最后一个inner piece 483 | startkeys.pop_back(); 484 | auto rr = append_segmentation(error,startkeys,filmada,k+1); 485 | startkeys.clear(); 486 | // cout << "thank You, my Lord! i need You!"< >(++c,startkeys) ; 489 | } 490 | else if (filmada->innerlevels.size() > 1 && (k == filmada->innerlevels.size()-1) ) // 如上为 由于创建了new innner piece,导致了new innerlevel,如下为,虽然创建了new innerpiece,但只需要更新上层的innner level 491 | { 492 | cout << "thank You, my Lord! i need You!"< >(++c,startkeys) ; 495 | } 496 | else{ 497 | startkeys.clear(); 498 | } 499 | a = filmada->innerlevels[k]->opt->get_segment(); 500 | if (filmada->innerlevels[k]->pos > 2) 501 | filmada->innerlevels[k]->innerpieces.pop_back(); 502 | filmada->innerlevels[k]->innerpieces.emplace_back(a); 503 | 504 | startkeys.emplace_back(a.first); 505 | return std::pair >(++c,startkeys) ; 506 | } 507 | 508 | } 509 | 510 | auto a = filmada->innerlevels[k]->opt->get_segment(); 511 | if (filmada->innerlevels[k]->pos > 2) 512 | filmada->innerlevels[k]->innerpieces.pop_back(); 513 | filmada->innerlevels[k]->innerpieces.emplace_back(a); 514 | 515 | startkeys.emplace_back(a.first); 516 | return std::pair >(++c,startkeys) ; 517 | 518 | } 519 | 520 | 521 | 522 | template 523 | std::pair > append_segmentation(size_t error,std::vector keys,key_type* payload, 524 | filmtype *filmada, unsigned int &inkeynum,leaf_type* m_tailleaf,filmadalrutype *interchain){ 525 | 526 | 527 | size_t c = 0; 528 | size_t start = 0; 529 | std::vector startkeys; 530 | unsigned int pos = 0; 531 | pair p(keys[0],pos++) ; 532 | int valuesize = filmada->valuesize; 533 | inkeynum +=1; 534 | 535 | 536 | if (m_tailleaf == NULL) // 初始化 537 | { 538 | leaf_type *cur_leaf = new leaf_type; 539 | cur_leaf->slotkey.reserve(8192*4); 540 | cur_leaf->slotdata.reserve(8192*4); 541 | cur_leaf->slotflag.reserve(8192*4); 542 | filmada->m_tailleaf = cur_leaf; 543 | filmada ->leaflevel.leafpieces.emplace_back( filmada->m_tailleaf); 544 | // m_tailleaf = cur_leaf; 545 | insertPWLF *leafopt = new insertPWLF (error); 546 | filmada->leaflevel.opt = leafopt ; 547 | 548 | } 549 | // auto xx = filmada->leaflevel.second[0]->buffqueue.size(); 550 | filmada->leaflevel.opt->append_point(p.first, p.second); 551 | // key_type* value = new key_type[valuesize]; 552 | filmada->m_tailleaf->slotdata.emplace_back( filmada->m_tailleaf->intrachain.put(p.second,payload)); 553 | filmada->m_tailleaf->slotkey.emplace_back(p.first); 554 | filmada->m_tailleaf->slotflag.emplace_back(true); 555 | for (inkeynum; inkeynum < keys.size(); ++inkeynum) { 556 | pair next_p(keys[inkeynum],pos++) ; 557 | if (inkeynum != start && next_p.first == p.first) 558 | continue; 559 | p = next_p; 560 | if (filmada ->leaflevel.opt->append_point(p.first, p.second)) { 561 | filmada->m_tailleaf->slotkey.emplace_back(p.first); 562 | // key_type* value = new key_type[valuesize]; 563 | filmada->m_tailleaf->slotdata.emplace_back( filmada->m_tailleaf->intrachain.put(p.second,payload)); 564 | filmada->m_tailleaf->slotflag.emplace_back( true); 565 | } 566 | else 567 | { 568 | start = inkeynum; 569 | auto a = filmada ->leaflevel.opt->get_segment(keys[--inkeynum]); // 将生成的new leaf piece插入到leaflevel 中 570 | filmada->m_tailleaf->update(a); 571 | // filmada ->leaflevel.leafpieces.emplace_back( filmada->m_tailleaf); 572 | interchain->put(filmada->m_tailleaf->startkey,filmada->m_tailleaf); 573 | // 这里是初始化 parent piece 的first key 和 last key 574 | if (filmada->innerlevels.size() == 0){ 575 | startkeys.emplace_back( filmada->m_tailleaf->startkey); 576 | startkeys.emplace_back( p.first); 577 | typename filmtype:: Innerpiece innerpiece;// 创建parent piece 578 | insertPWLF *inneropt = new insertPWLF(error); 579 | typename filmtype::Innerlevel *innerlevel = new typename filmtype::Innerlevel; 580 | filmada->innerlevels.emplace_back(innerlevel); 581 | filmada->innerlevels[0]->opt = inneropt ; 582 | cout << " my Lord, Jesus, please have pity on me"<< endl; 583 | } 584 | else{ 585 | // 从innerlevel 的最底层到root 层,判断是否需要更新 586 | startkeys.emplace_back( p.first); 587 | // cout << "my lovely Lord, i trust in You!" << endl; 588 | } 589 | auto rr = append_segmentation(error,startkeys,filmada,0); 590 | // startkeys.clear(); 591 | vector().swap(startkeys); 592 | // cout << "Jesus, i need You!!"<< endl; 593 | pos = 0; 594 | 595 | filmada->m_tailleaf = new leaf_type; 596 | filmada->m_tailleaf->slotkey.reserve(8192*4); 597 | filmada->m_tailleaf->slotdata.reserve(8192*4); 598 | filmada->m_tailleaf->slotflag.reserve(8192*4); 599 | filmada ->leaflevel.leafpieces.emplace_back( filmada->m_tailleaf); 600 | ++c; 601 | } 602 | } 603 | 604 | auto a = filmada ->leaflevel.opt->get_segment( keys.back()); 605 | filmada->m_tailleaf->update(a); 606 | interchain->put(filmada->m_tailleaf->startkey,filmada->m_tailleaf); 607 | return std::pair> (++c,startkeys); 608 | 609 | } 610 | 611 | 612 | 613 | } 614 | 615 | 616 | 617 | 618 | #endif //FILMINSERT_PWLF_H 619 | -------------------------------------------------------------------------------- /filmadastorage.h: -------------------------------------------------------------------------------- 1 | 2 | 3 | // 4 | // Created by chaochao on 2021/12/23. 5 | // 6 | 7 | #ifndef EXPERIMENTCC12_FILMADASTORAGE_H 8 | #define EXPERIMENTCC12_FILMADASTORAGE_H 9 | 10 | #include 11 | #include 12 | #include 13 | //#define _GNU_SOURCE 14 | #include 15 | #include "filmadalru.h" 16 | #include "film.h" 17 | 18 | using namespace std; 19 | typedef unsigned short lruOff_type; 20 | typedef unsigned short int pageOff_type; 21 | typedef unsigned int pageid_type; 22 | namespace filmstorage { 23 | typedef std::map infomap; 24 | template 25 | class filmmemory{ 26 | public: 27 | int totalnum; 28 | pageid_type inpageid = 0; 29 | double threshold; 30 | double reserveMem; 31 | index_type *index; // 32 | lru_type *lru; 33 | vector> evicttable; // pageid , offset in page 34 | vector evictkey; // the evicted key, used in debug 35 | vector*> evictPoss; 36 | 37 | 38 | filmmemory( unsigned long int numkey,double Threshold, index_type *filmtree , lru_type *LRU){ 39 | totalnum = 0; 40 | threshold = Threshold; 41 | evicttable.reserve(numkey); 42 | index = filmtree; 43 | lru = LRU; 44 | 45 | } 46 | 47 | 48 | void insert(const std::vector keys,key_type* payload,const int error,const int error_recursize ){ 49 | index->build(keys,payload,error,error_recursize,lru); 50 | } 51 | 52 | void append(const std::vector keys,key_type* payload,const int error,const int error_recursize ){ 53 | index->update_append(keys,payload,error,error_recursize,lru); 54 | } 55 | 56 | // void update(const std::vector keys,const std::vector payload){ 57 | // index->update_random(keys,payload); 58 | // } 59 | 60 | void update(const std::vector keys,key_type* payload){ 61 | index->update_random(keys,payload,lru); // out-of-order insertion 62 | } 63 | 64 | 65 | struct memoryusage 66 | { 67 | /// the total used memory 68 | double totalusemem; 69 | /// the usage of each component 70 | infomap meminfo; 71 | /// Constructor 72 | inline memoryusage(double total,infomap eachmem) 73 | : totalusemem(total), meminfo(eachmem) 74 | { } 75 | inline memoryusage() 76 | : totalusemem(), meminfo() 77 | { } 78 | 79 | }; 80 | 81 | struct inmempage // a buffer page in memory 82 | { 83 | pageid_type pageid; 84 | //vector inmemdata; 85 | key_type* inmemdata = NULL; 86 | int pagesize; 87 | int freespace; 88 | int recordnum; 89 | inmempage(pageid_type idinpage,int sizepage,int numrecord){ 90 | pageid = idinpage; 91 | pagesize = sizepage; 92 | inmemdata = new key_type[pagesize]; 93 | //freespace = pagesize; 94 | freespace = 0; 95 | recordnum = numrecord; 96 | } 97 | inmempage(){ 98 | //freespace = pagesize; 99 | } 100 | ~inmempage(){ 101 | delete [] inmemdata; 102 | } 103 | 104 | 105 | }; 106 | 107 | // define the current page in memory, when the page is full, write to disk 108 | inmempage *inpage = NULL; 109 | vector inmempages ; 110 | 111 | forceinline void createinmempage(int sizepage,int numrecord){ 112 | 113 | inpage = new inmempage(inpageid,sizepage,numrecord); //create a page in memory; 114 | inpageid += 1; 115 | 116 | } 117 | 118 | // compute the usage of memory, that the total used memory by index,lru,data.addusage; 119 | 120 | memoryusage simu_computeMemUsage(int transnum){ 121 | 122 | infomap indexdatausage = index->show_verify(); 123 | double dataV = ( index->valuesize+1)*8; 124 | double datausage = double((index->inkeynum-transnum)*(dataV))/1024/1024; 125 | double addusage = double((index->exkeynum+index->inkeynum)*(1+8)+(index->exkeynum+transnum)*(8+4)+(index->exkeynum+transnum)*sizeof(key_type) )/1024/1024; 126 | double indexusage = indexdatausage["indexusage"]; 127 | double hashlrusize = sizeof(key_type) + 8+ 16 + 16; 128 | double no_hashsize = 16+4; //prev,next, slot-short int 129 | double lruusage = double(no_hashsize*(index->inkeynum-transnum) + hashlrusize * index->vstats.leaves)/1024/1024; 130 | double totalmem = lruusage + datausage + indexusage + addusage; 131 | double leaves = indexdatausage["leaves"]; 132 | double levels = indexdatausage["levels"]; 133 | double innernodes = indexdatausage["inners"]; 134 | double exkeynum = index->exkeynum; 135 | double inkeynum = index->inkeynum; 136 | 137 | infomap devied_mem; 138 | devied_mem.insert(pair("datausage",datausage)); 139 | devied_mem.insert(pair("indexusage",indexusage)); 140 | devied_mem.insert(pair("lruusage",lruusage)); 141 | devied_mem.insert(pair("addusage",addusage)); //key,flag,pageID,offset 142 | devied_mem.insert(pair("leaves",leaves)); 143 | devied_mem.insert(pair("levels",levels)); 144 | devied_mem.insert(pair("innernodes",innernodes)); 145 | devied_mem.insert(pair("totalusage",totalmem)); //key,flag,pageID,offset 146 | devied_mem.insert(pair("exkeynum",exkeynum)); 147 | devied_mem.insert(pair("inkeynum",inkeynum)); 148 | memoryusage res_memusage(totalmem,devied_mem); 149 | return res_memusage; 150 | 151 | } 152 | 153 | memoryusage runtimecomputeMemUsage(){ 154 | 155 | infomap indexdatausage = index->runtimeshow_verify(); 156 | double lruusage = indexdatausage["lruusage"] ; 157 | double indexusage = indexdatausage["indexusage"]; 158 | double datausage = indexdatausage["datausage"]; 159 | double addusage = indexdatausage["addusage"]; 160 | double totalmem = lruusage + datausage + indexusage + addusage; 161 | double leaves = indexdatausage["leaves"]; 162 | double levels = indexdatausage["levels"]; 163 | double innernodes = indexdatausage["inners"]; 164 | double exkeynum = index->exkeynum; 165 | double inkeynum = index->inkeynum; 166 | 167 | infomap devied_mem; 168 | devied_mem.insert(pair("datausage",datausage)); 169 | devied_mem.insert(pair("indexusage",indexusage)); 170 | devied_mem.insert(pair("lruusage",lruusage)); 171 | devied_mem.insert(pair("addusage",addusage)); //key,flag,pageID,offset 172 | devied_mem.insert(pair("leaves",leaves)); 173 | devied_mem.insert(pair("levels",levels)); 174 | devied_mem.insert(pair("innernodes",innernodes)); 175 | devied_mem.insert(pair("totalusage",totalmem)); //key,flag,pageID,offset 176 | devied_mem.insert(pair("exkeynum",exkeynum)); 177 | devied_mem.insert(pair("inkeynum",inkeynum)); 178 | memoryusage res_memusage(totalmem,devied_mem); 179 | return res_memusage; 180 | 181 | } 182 | 183 | 184 | memoryusage computeMemUsage(){ 185 | 186 | infomap indexdatausage = index->show_verify(); 187 | double lruusage = indexdatausage["lruusage"] ; 188 | double indexusage = indexdatausage["indexusage"]; 189 | double datausage = indexdatausage["datausage"]; 190 | double addusage = indexdatausage["addusage"]; 191 | double totalmem = lruusage + datausage + indexusage + addusage; 192 | double leaves = indexdatausage["leaves"]; 193 | double levels = indexdatausage["levels"]; 194 | double innernodes = indexdatausage["inners"]; 195 | double exkeynum = index->exkeynum; 196 | double inkeynum = index->inkeynum; 197 | 198 | infomap devied_mem; 199 | devied_mem.insert(pair("datausage",datausage)); 200 | devied_mem.insert(pair("indexusage",indexusage)); 201 | devied_mem.insert(pair("lruusage",lruusage)); 202 | devied_mem.insert(pair("addusage",addusage)); //key,flag,pageID,offset 203 | devied_mem.insert(pair("leaves",leaves)); 204 | devied_mem.insert(pair("levels",levels)); 205 | devied_mem.insert(pair("innernodes",innernodes)); 206 | devied_mem.insert(pair("totalusage",totalmem)); //key,flag,pageID,offset 207 | devied_mem.insert(pair("exkeynum",exkeynum)); 208 | devied_mem.insert(pair("inkeynum",inkeynum)); 209 | memoryusage res_memusage(totalmem,devied_mem); 210 | return res_memusage; 211 | 212 | } 213 | 214 | pair* writeevicttable(pair pospage,pair* evictpos){ 215 | pair* oldpospage = evictpos; 216 | evictpos->first = pospage.first; 217 | evictpos->second = pospage.second; 218 | return oldpospage; 219 | 220 | // evictpos->first = pospage.first; 221 | // evictpos->second = pospage.second; 222 | } 223 | 224 | pair* writeevicttable(pair pospage){ 225 | unsigned long int evictpos = evicttable.size(); 226 | // evictkey.push_back(key); 227 | evicttable.push_back(pospage); 228 | auto eptr = &evicttable[evictpos]; 229 | return eptr; 230 | } 231 | 232 | 233 | unsigned short int runtimeevictpagestodisk(disk_type *diskpage){ 234 | unsigned short int pnum = inmempages.size(); 235 | unsigned short int num = inmempages.size(); // 此次要写入磁盘的页的数目 236 | int fd = open(diskpage->pagefile, O_RDWR | O_DIRECT, 0755); 237 | unsigned long int fixed_buf_size = diskpage->pagesize * sizeof(key_type); // 磁盘页固定的大小 238 | unsigned long seekdis = fixed_buf_size * diskpage->nextpageid; 239 | lseek(fd, seekdis, SEEK_SET); 240 | int i = 0; 241 | while (pnum > diskpage->blocknum) { 242 | pnum -= diskpage->blocknum; //diskpage->blocknum 表示 写出block 包含多少个page 243 | key_type *buf = new key_type[diskpage->pagesize* diskpage->blocknum]; 244 | 245 | unsigned long buf_size = fixed_buf_size * diskpage->blocknum; 246 | int ret = posix_memalign((void **) &buf, 512, buf_size); 247 | 248 | int offset = 0; 249 | for(int k = 0;kblocknum;k++){ 250 | memcpy(buf + offset * diskpage->pagesize, &inmempages[i*diskpage->blocknum+k]->inmemdata[0], fixed_buf_size); 251 | ++offset; 252 | } 253 | i += 1; 254 | ret = write(fd, buf, buf_size); 255 | // free(buf); 256 | delete [] buf; 257 | } 258 | 259 | key_type *buf = new key_type[diskpage->pagesize*pnum]; 260 | unsigned long int buf_size = diskpage->pagesize * sizeof(key_type) * pnum; 261 | int ret = posix_memalign((void **) &buf, 512, buf_size); 262 | unsigned int offset = 0; 263 | for(int k = 0;kpagesize, &inmempages[i*diskpage->blocknum+k]->inmemdata[0], fixed_buf_size); 265 | ++offset; 266 | } 267 | ret = write(fd, buf, buf_size); 268 | // free(buf); 269 | delete[] buf; 270 | diskpage->nextpageid += num; 271 | close(fd); 272 | 273 | for (int mi = 0;mi < inmempages.size();mi++){ 274 | delete inmempages[mi]; 275 | inmempages[mi] = NULL; 276 | } 277 | inmempages.resize(0); 278 | return num; 279 | } 280 | 281 | pair* evictkeytoinpage(key_type ekey,data_type edata, disk_type *diskpage){ 282 | 283 | if (!(inpage->recordnum --)) //如果还能容纳一个record,那么就向该页写出,如果不能,那么就将当前的inpage写出disk,创建新的inpage 284 | { 285 | inmempages.emplace_back(inpage); 286 | // diskpage->odirectenterpage(inpage->inmemdata);// write to disk 287 | createinmempage(diskpage->pagesize,diskpage->recordnum); 288 | inpage->recordnum -=1; 289 | 290 | } 291 | 292 | auto evictpos = writeevicttable(pair(inpage->pageid, inpage->freespace)); 293 | inpage->inmemdata[inpage->freespace++] = ekey; 294 | for (int vi = 0;vi < index->valuesize;vi++){ 295 | inpage->inmemdata[inpage->freespace++] = edata[vi]; 296 | } 297 | 298 | return evictpos; 299 | 300 | } 301 | 302 | 303 | void evictkeytoinpage(key_type ekey,data_type edata, disk_type *diskpage,pair* evictpos){ 304 | 305 | if (!(inpage->recordnum --)) //如果还能容纳一个record,那么就向该页写出,如果不能,那么就将当前的inpage写出disk,创建新的inpage 306 | { 307 | inmempages.emplace_back(inpage); 308 | // diskpage->odirectenterpage(inpage->inmemdata);// write to disk 309 | createinmempage(diskpage->pagesize,diskpage->recordnum); 310 | inpage->recordnum -=1; 311 | 312 | } 313 | writeevicttable(pair(inpage->pageid, inpage->freespace),evictpos); 314 | inpage->inmemdata[inpage->freespace++] = ekey; 315 | for (int vi = 0;vi < index->valuesize;vi++){ 316 | inpage->inmemdata[inpage->freespace++] = edata[vi]; 317 | } 318 | 319 | } 320 | 321 | 322 | /* 323 | pair* evictkeytoinpage(key_type ekey,data_type edata, disk_type *diskpage,pair* evictpos){ 324 | if (!(inpage->recordnum --)) //如果还能容纳一个record,那么就执行transfer,如果不能,那么就将当前的inpage写出disk,创建新的inpage 325 | { 326 | diskpage->odirectenterpage(inpage->inmemdata);// write to disk 327 | createinmempage(diskpage->pagesize,diskpage->recordnum); 328 | inpage->recordnum -=1; 329 | } 330 | short int offset = inpage->inmemdata.size(); 331 | inpage->inmemdata.push_back(ekey); 332 | inpage->freespace -= 1; 333 | inpage->inmemdata.insert( inpage->inmemdata.begin()+(inpage->pagesize - inpage->freespace),edata.begin(),edata.end()); 334 | inpage->freespace -= (index->valuesize); 335 | // short int offset2 = (inpage->pagesize - index->valuesize-1 - inpage->freespace); 336 | pair* olppagepos = writeevicttable(pair(inpage->pageid, offset),evictpos); 337 | return olppagepos; 338 | } 339 | */ 340 | /* 341 | pair evictkeytoinpage(key_type ekey,data_type edata, disk_type *diskpage,int evictpos){ 342 | if (!(inpage->recordnum --)) //如果还能容纳一个record,那么就执行transfer,如果不能,那么就将当前的inpage写出disk,创建新的inpage 343 | { 344 | diskpage->odirectenterpage(inpage->inmemdata);// write to disk 345 | createinmempage(diskpage->pagesize,index->recordsize+1); 346 | inpage->recordnum -=1; 347 | } 348 | inpage->inmemdata.push_back(ekey); 349 | inpage->freespace -= 1; 350 | inpage->inmemdata.insert( inpage->inmemdata.begin()+(inpage->pagesize - inpage->freespace),edata.second.begin(),edata.second.end()); 351 | inpage->freespace -= index->recordsize; 352 | 353 | pair olppagepos = writeevicttable(pair(inpage->pageid, (inpage->pagesize - index->recordsize -1 - inpage->freespace)),evictpos); 354 | return olppagepos; 355 | } 356 | */ 357 | 358 | // pair* evictkeytoinpage(key_type ekey,data_type edata, disk_type *diskpage){ 359 | // 360 | // if (!(inpage->recordnum --)) //如果还能容纳一个record,那么就执行transfer,如果不能,那么就将当前的inpage写出disk,创建新的inpage 361 | // { 362 | // diskpage->odirectenterpage(inpage->inmemdata);// write to disk 363 | // createinmempage(diskpage->pagesize,index->recordsize); 364 | // inpage->recordnum -=1; 365 | // } 366 | // inpage->inmemdata.push_back(ekey); 367 | // inpage->freespace -= 1; 368 | // inpage->inmemdata.insert( inpage->inmemdata.begin()+(inpage->pagesize - inpage->freespace),edata.begin(),edata.end()); 369 | // inpage->freespace -= (index->recordsize-1); 370 | // 371 | // auto posevict = writeevicttable(pair(inpage->pageid, (inpage->pagesize - index->recordsize - inpage->freespace))); 372 | // return posevict; 373 | // } 374 | 375 | 376 | 377 | unsigned long filmtransfer(unsigned int transleaves,disk_type *diskpage){ //perform the transfer procedure, 378 | // 379 | timeval initw1,initw2; 380 | unsigned long initwtime = 0; 381 | if (inpage == NULL){ 382 | createinmempage(diskpage->pagesize,diskpage->recordnum); //create a page in memory; 383 | } 384 | if (index->m_transleaf == NULL){ 385 | index->m_transleaf = index->leaflevel.leafpieces[0]; 386 | } 387 | /* 388 | if ( this->index->Error > 256 && transleaves == 1 ) 389 | {// 如果是这种情况,需要在一个leaf 中 批量写出,因为一次性写出leaf 所有的数据 会使得 内存的usage not full 390 | // 判断该页全部数据写出去 的usage 391 | auto midtransflag = simu_computeMemUsage(index->m_transleaf->slotkey.size()); 392 | if (midtransflag.totalusemem > threshold ){ 393 | for (int k = 0; k < index->m_transleaf->slotkey.size();k++) 394 | { 395 | if (!(inpage->recordnum --)) //如果还能容纳一个record,那么就执行transfer,如果不能,那么就将当前的inpage写出disk,创建新的inpage 396 | { 397 | gettimeofday(&initw1, NULL); 398 | diskpage->odirectenterpage(inpage->inmemdata);// write to disk , 这里是初始化阶段,所以没有采用 odirect 的方式 ,如果采用,只需要将在enterpage 前加上odirect, 去掉则不是直接操作磁盘 399 | delete inpage; 400 | inpage = NULL; 401 | createinmempage(diskpage->pagesize,diskpage->recordnum); 402 | gettimeofday(&initw2, NULL); 403 | initwtime += (initw2.tv_sec - initw1.tv_sec) + (double) (initw2.tv_usec - initw1.tv_usec) / 1000000.0; 404 | inpage->recordnum -=1; 405 | } 406 | 407 | auto eptr = writeevicttable(pair(inpage->pageid, inpage->freespace)); 408 | inpage->inmemdata[inpage->freespace++] = index->m_transleaf->slotkey[k]; 409 | auto transdata = (adalru::Node*) index->m_transleaf->slotdata[k]; 410 | for (int vi = 0; vi < index->valuesize; vi++){ 411 | inpage->inmemdata[inpage->freespace++] = transdata->value[vi]; 412 | } 413 | 414 | index->m_transleaf->intrachain.remove_node(transdata);//pop from intrachain 415 | transdata = NULL; 416 | index->m_transleaf->slotflag[k] = false; 417 | index->m_transleaf->slotdata[k] = eptr ; // 418 | } 419 | lru->remove(index->m_transleaf->startkey);// pop from interchain 420 | index->inkeynum -= index->m_transleaf->slotkey.size(); 421 | index->exkeynum += index->m_transleaf->slotkey.size(); 422 | index->m_transleaf = index->leaflevel.leafpieces[index->leafsplit+1];//evict data from the first leaf 423 | index->leafsplit += transleaves; 424 | } 425 | else{ // 一点一点的写出 426 | auto midtrans = computeMemUsage(); 427 | int split = 0; 428 | while (midtrans.totalusemem > threshold){ 429 | double ratio = (midtrans.totalusemem-threshold)/(midtrans.totalusemem-midtransflag.totalusemem ) ; 430 | int trannum = ceil(index->m_transleaf->slotkey.size() * ratio+100); 431 | for (int k = 0; k < trannum;k++) 432 | { 433 | if (!(inpage->recordnum --)) //如果还能容纳一个record,那么就执行transfer,如果不能,那么就将当前的inpage写出disk,创建新的inpage 434 | { 435 | gettimeofday(&initw1, NULL); 436 | diskpage->odirectenterpage(inpage->inmemdata);// write to disk , 这里是初始化阶段,所以没有采用 odirect 的方式 ,如果采用,只需要将在enterpage 前加上odirect, 去掉则不是直接操作磁盘 437 | delete inpage; 438 | inpage = NULL; 439 | createinmempage(diskpage->pagesize,diskpage->recordnum); 440 | gettimeofday(&initw2, NULL); 441 | initwtime += (initw2.tv_sec - initw1.tv_sec) + (double) (initw2.tv_usec - initw1.tv_usec) / 1000000.0; 442 | inpage->recordnum -=1; 443 | } 444 | 445 | auto transdata = (adalru::Node*) index->m_transleaf->slotdata[split+k].second; 446 | auto eptr = writeevicttable(pair(inpage->pageid, inpage->freespace)); 447 | inpage->inmemdata[inpage->freespace++] = index->m_transleaf->slotkey[split+k]; 448 | 449 | for (int vi = 0; vi < index->valuesize; vi++){ 450 | inpage->inmemdata[inpage->freespace++] = transdata->value[vi]; 451 | } 452 | index->m_transleaf->intrachain.remove_node(transdata);//pop from intrachain 453 | transdata = NULL; 454 | index->m_transleaf->slotdata[split+k].first = false; 455 | index->m_transleaf->slotdata[split+k].second = eptr ; // 456 | } 457 | split += trannum; 458 | index->inkeynum -= trannum; 459 | index->exkeynum += trannum; 460 | midtrans = computeMemUsage(); 461 | } 462 | } 463 | } 464 | else{ 465 | */ 466 | for (int i = 0; i< transleaves; i++){ 467 | for (unsigned int k = 0; k < index->m_transleaf->slotkey.size();k++) 468 | { 469 | if (!(inpage->recordnum --)) //如果还能容纳一个record,那么就执行transfer,如果不能,那么就将当前的inpage写出disk,创建新的inpage 470 | { 471 | 472 | gettimeofday(&initw1, NULL); 473 | diskpage->odirectenterpage(inpage->inmemdata);// write to disk , 这里是初始化阶段,所以没有采用 odirect 的方式 ,如果采用,只需要将在enterpage 前加上odirect, 去掉则不是直接操作磁盘 474 | gettimeofday(&initw2, NULL); 475 | initwtime += (initw2.tv_sec - initw1.tv_sec) * 1000000 + (initw2.tv_usec - initw1.tv_usec); 476 | delete inpage; 477 | inpage = NULL; 478 | createinmempage(diskpage->pagesize,diskpage->recordnum); 479 | inpage->recordnum -=1; 480 | } 481 | 482 | auto eptr = writeevicttable(pair(inpage->pageid, inpage->freespace)); 483 | inpage->inmemdata[inpage->freespace++] = index->m_transleaf->slotkey[k]; 484 | auto transdata = (adalru::Node*) index->m_transleaf->slotdata[k]; 485 | for (int vi = 0; vi < index->valuesize; vi++){ 486 | inpage->inmemdata[inpage->freespace++] = transdata->value[vi]; 487 | } 488 | 489 | index->m_transleaf->intrachain.remove_node(transdata);//pop from intrachain 490 | // delete transdata; 491 | transdata = NULL; 492 | index->m_transleaf->slotflag[k] = false; 493 | index->m_transleaf->slotdata[k] = eptr ; // 494 | } 495 | lru->remove(index->m_transleaf->startkey);// pop from interchain 496 | index->inkeynum -= index->m_transleaf->slotkey.size(); 497 | index->exkeynum += index->m_transleaf->slotkey.size(); 498 | index->m_transleaf = index->leaflevel.leafpieces[index->leafsplit+i+1];//evict data from the first leaf 499 | } 500 | index->leafsplit += transleaves; 501 | 502 | diskpage->initwtime += initwtime; 503 | return initwtime; 504 | } 505 | 506 | unsigned long filmtransfer(double totalusemem,disk_type *diskpage){ //perform the transfer procedure, 507 | // 508 | timeval initw1,initw2; 509 | unsigned long initwtime = 0; 510 | 511 | // 大致计算出此次要evict 的数量 512 | double ratio = (0.3) * (totalusemem - threshold) / threshold; 513 | unsigned int estimate_evict = ceil(index->inkeynum * ratio) + 300; 514 | unsigned int batch_evict = (1280 * diskpage->pagesize / (index->valuesize + 1)); 515 | if (estimate_evict < batch_evict) 516 | batch_evict = estimate_evict; 517 | gettimeofday(&initw1, NULL); 518 | for (unsigned int i = 0; i < batch_evict; i++) // the number of records to be evicted in batches 519 | { 520 | // evict data, get the tail node, remove the tail of intrachain 521 | auto evictleaf = lru->get_tail(); 522 | auto evictslotV = evictleaf->intrachain.poptail(); // the tail of the accessed leaf 523 | auto writeevict = evictkeytoinpage(evictleaf->slotkey[evictslotV->key], evictslotV->value, 524 | diskpage); 525 | evictleaf->slotflag[evictslotV->key] = false; 526 | evictleaf->slotdata[evictslotV->key] = writeevict; 527 | delete evictslotV; 528 | evictslotV = NULL; 529 | 530 | } 531 | 532 | runtimeevictpagestodisk(diskpage); 533 | gettimeofday(&initw2, NULL); 534 | initwtime += (initw2.tv_sec - initw1.tv_sec)* 1000000 + (initw2.tv_usec - initw1.tv_usec); 535 | 536 | index->inkeynum -= batch_evict; 537 | index->exkeynum += batch_evict; 538 | diskpage->initwtime += initwtime; 539 | return initwtime; 540 | } 541 | 542 | 543 | 544 | template 545 | pair runtimejudgetrans(stat_type query_stats){ 546 | struct timeval ct1, ct2; 547 | double ctimeuse; 548 | gettimeofday(&ct1, NULL); 549 | memoryusage res_memusage = runtimecomputeMemUsage(); 550 | gettimeofday(&ct2, NULL); 551 | ctimeuse = (ct2.tv_sec - ct1.tv_sec) + (double) (ct2.tv_usec - ct1.tv_usec) / 1000000.0; 552 | query_stats->computetimeuse += ctimeuse; 553 | if (res_memusage.totalusemem > threshold) //perform transfer process 554 | // 判断如果全部数据写出去都不能满足larger-than-memory data set 555 | return pair (true,res_memusage.totalusemem); 556 | else 557 | return pair (false,res_memusage.totalusemem); 558 | } 559 | 560 | template 561 | pair judgetransfer(stat_type range_stats ){ 562 | memoryusage res_memusage; 563 | struct timeval ct1, ct2; 564 | double ctimeuse; 565 | gettimeofday(&ct1, NULL); 566 | res_memusage = computeMemUsage(); 567 | gettimeofday(&ct2, NULL); 568 | ctimeuse = (ct2.tv_sec - ct1.tv_sec) + (double) (ct2.tv_usec - ct1.tv_usec) / 1000000.0; 569 | range_stats->computetimeuse += ctimeuse; 570 | map::iterator iter; 571 | 572 | for(iter = res_memusage.meminfo.begin(); iter != res_memusage.meminfo.end(); iter++) 573 | cout<first<<" "<second<<" *** "; 574 | cout< threshold){//perform transfer process 576 | return pair(true,res_memusage); 577 | } 578 | return pair(false,res_memusage); 579 | } 580 | 581 | 582 | pair judgetransfer( ){ 583 | memoryusage res_memusage; 584 | res_memusage = computeMemUsage(); 585 | 586 | map::iterator iter; 587 | 588 | for(iter = res_memusage.meminfo.begin(); iter != res_memusage.meminfo.end(); iter++) 589 | cout<first<<" "<second<<" *** "; 590 | cout< threshold){//perform transfer process 592 | return pair(true,res_memusage); 593 | } 594 | return pair(false,res_memusage); 595 | } 596 | }; 597 | 598 | typedef std::map infomap; 599 | template 600 | class filmdisk{ 601 | public: 602 | const char *pagefile; 603 | int pagesize; 604 | pageid_type nextpageid; 605 | int recordnum; // 一个页最多容纳的 record 的数量 606 | int recordsize; 607 | int blocknum; 608 | unsigned long initwtime; 609 | filmdisk( const char* diskfile,int sizepage,int numrecord,int sizerecord) { 610 | pagefile = diskfile; 611 | pagesize = sizepage; 612 | recordnum = numrecord; 613 | blocknum = 1024*1024/(sizepage*8); 614 | nextpageid = 0; 615 | initwtime = 0.0; 616 | recordsize = sizerecord; 617 | } 618 | 619 | 620 | vector readfromdisk(pair diskpos ,int sizerecord){ // use the buffer of memory 621 | FILE *fdisk;// 读取磁盘文件 622 | fdisk = fopen(pagefile,"rb+"); 623 | fseek(fdisk,static_cast(diskpos.first*pagesize*8),SEEK_SET); 624 | vector pagedata; 625 | key_type tmp; 626 | 627 | for (int i = 0; i < pagesize; i ++){ 628 | fread(&tmp, sizeof(long int), 1, fdisk); // 从文件中读数 629 | pagedata.push_back(tmp); 630 | // cout<< tmp << " "; 631 | } 632 | 633 | vector res; 634 | 635 | for (int i = 0; i < sizerecord; i ++){ 636 | res.push_back(pagedata[diskpos.second+i]); 637 | } 638 | 639 | // cout<<"Jesus, You are my refuge! "< odirectreadfromdisk(pair diskpos ,int sizerecord){ 647 | int fd; 648 | key_type *buf; 649 | vector res; 650 | unsigned long int buf_size = pagesize*8; 651 | int ret = posix_memalign((void **)&buf, 512, buf_size); 652 | memset(buf, 'c', buf_size); 653 | 654 | fd = open(pagefile, O_RDWR | O_DIRECT , 0755); 655 | /* 656 | if (fd < 0){ 657 | perror("open ./direct_io.data failed"); 658 | exit(1); 659 | } 660 | */ 661 | unsigned long seekdis = buf_size * diskpos.first; 662 | ret = pread(fd, buf, buf_size,seekdis); 663 | if (ret <= 0){ 664 | cout << "Jesus, i need You!" << endl; 665 | } 666 | for (int i = 0; i < sizerecord; i ++){ 667 | res.push_back(buf[diskpos.second+i]); 668 | } 669 | // cout<<"Jesus, You are my refuge! "< odirectreadfromdisk(pair* diskpos){ 678 | int fd; 679 | key_type *buf; 680 | pair res; 681 | uint64_t buf_size = pagesize*8; 682 | int ret = posix_memalign((void **)&buf, 512, buf_size); 683 | memset(buf, 'c', buf_size); 684 | 685 | fd = open(pagefile, O_RDWR | O_DIRECT , 0755); 686 | uint64_t aoff = diskpos->first*buf_size; 687 | ret = pread(fd, buf, buf_size,aoff); 688 | if (ret <= 0){ 689 | cout << "Jesus, i need You!" << endl; 690 | } 691 | 692 | res.first = buf[diskpos->second]; 693 | res.second = new key_type[recordsize-1]; 694 | for (int i = 0; i < recordsize-1; i ++){ 695 | res.second[i] = buf[diskpos->second+1+i]; 696 | } 697 | // cout<<"Jesus, You are my refuge! "< datas) //将一个内存页,写入磁盘, doesn't consider the odirect 705 | { 706 | 707 | FILE *fdisk;// 读取磁盘文件 708 | fdisk = fopen(pagefile,"rb+"); 709 | fseek(fdisk,static_cast(nextpageid * (pagesize)*8),SEEK_SET); 710 | nextpageid += 1; 711 | // cout< reads(datas.size()); 724 | //// cout<< reads.size()<