├── AUTHORS ├── LICENCE ├── Makefile ├── README.md ├── VERSION └── src ├── fsais_src ├── em_compute_sa.hpp ├── em_induce_minus_and_plus_suffixes.hpp ├── em_induce_minus_star_substrings.hpp ├── em_induce_plus_star_substrings.hpp ├── em_induce_plus_suffixes.hpp ├── em_radix_heap.hpp ├── im_induce_substrings.hpp ├── im_induce_suffixes.hpp ├── io │ ├── async_backward_bit_stream_reader.hpp │ ├── async_backward_stream_reader.hpp │ ├── async_backward_stream_reader_multipart.hpp │ ├── async_bit_stream_writer.hpp │ ├── async_multi_bit_stream_reader.hpp │ ├── async_multi_stream_reader.hpp │ ├── async_multi_stream_reader_multipart.hpp │ ├── async_multi_stream_writer.hpp │ ├── async_stream_reader.hpp │ ├── async_stream_writer.hpp │ ├── async_stream_writer_multipart.hpp │ └── simple_accessor.hpp ├── naive_compute_sa.hpp ├── packed_pair.hpp ├── radix_heap.hpp ├── utils.cpp └── utils.hpp ├── main.cpp ├── uint24.hpp ├── uint40.hpp └── uint48.hpp /AUTHORS: -------------------------------------------------------------------------------- 1 | Juha Karkkainen 2 | Dominik Kempa 3 | -------------------------------------------------------------------------------- /LICENCE: -------------------------------------------------------------------------------- 1 | Copyright 2017-2020 Juha Karkkainen, Dominik Kempa 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | SHELL = /bin/sh 2 | CC = g++ 3 | CFLAGS = -Wall -Wextra -pedantic -Wshadow -funroll-loops -DNDEBUG -O3 -march=native -std=c++0x -pthread 4 | #CFLAGS = -Wall -Wextra -pedantic -Wshadow -g2 -std=c++0x -pthread 5 | #AUX_DISK_FLAGS = -DMONITOR_DISK_USAGE 6 | 7 | all: construct_sa 8 | 9 | construct_sa: 10 | $(CC) $(CFLAGS) -o construct_sa src/main.cpp src/fsais_src/utils.cpp -fopenmp $(AUX_DISK_FLAGS) 11 | 12 | clean: 13 | /bin/rm -f *.o 14 | 15 | nuclear: 16 | /bin/rm -f construct_sa *.o 17 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | fSAIS - External memory suffix array construction using inducing 2 | ================================================================ 3 | 4 | 5 | Description 6 | ----------- 7 | 8 | fSAIS is an implementation of the external-memory suffix array 9 | construction algorithm. The algorithm was described in the paper 10 | 11 | @inproceedings{kkpz17alenex, 12 | author = {Juha K{\"{a}}rkk{\"{a}}inen and Dominik Kempa 13 | and Simon J. Puglisi and Bella Zhukova}, 14 | title = {Engineering External Memory Induced Suffix Sorting}, 15 | booktitle = {19th Meeting on Algorithm Engineering and Experimentation 16 | (ALENEX 2017)}, 17 | pages = {98--108}, 18 | year = {2017}, 19 | doi = {10.1137/1.9781611974768.8}, 20 | } 21 | 22 | The latest version of fSAIS is available from 23 | https://github.com/dominikkempa/fsais. 24 | 25 | 26 | 27 | Requirements 28 | ------------ 29 | 30 | fSAIS has no external dependencies (libraries, cmake, etc). 31 | It only requires: 32 | - g++ compiler supporting the -std=c++0x flag (all modern versions) 33 | - A 64-bit operating system. The current version has been tested 34 | on Linux/PC. 35 | 36 | 37 | 38 | Compilation and usage 39 | --------------------- 40 | 41 | The package contains a single Makefile in the main directory. Type 42 | `make` to build the executable. For usage instructions, run the 43 | program without any arguments. 44 | 45 | ### Example 46 | 47 | The simplest usage of fSAIS is as follows. Suppose the text is located 48 | in `/data/input.txt`. Then, to compute the suffix array of `input.txt` 49 | type: 50 | 51 | $ ./construct_sa /data/input.txt 52 | 53 | 54 | This will write the output suffix array to `/data/input.txt.sa5`. Each 55 | element of the suffix array is encoded using 40-bit integers, i.e., 56 | the output suffix array will take up 5n bytes of disk space. By 57 | default, the algorithm uses 3.5GiB of RAM for computation and it 58 | assumes that the input text is over byte alphabet (see below for 59 | explanation on how to adjust this). A more advanced usage is 60 | demonstrated below. 61 | 62 | $ ./construct_sa ./input.txt -m 8gi -o ../input.txt.sa 63 | 64 | 65 | Explanation: 66 | - The -m flag allows specifying the amount of RAM used during the 67 | computation (in bytes). In this example, the RAM limit is set to 8gi 68 | = 8 * 2^30 bytes (see below). 69 | - The -o flag allows specifying the location and filename of the 70 | output suffix array. The default location and filename is the same 71 | as input text, with the appended ".saX" suffix, where X is the used 72 | integer size (by default: 5 byte). 73 | 74 | Notes: 75 | - The argument of the -m flag (RAM used during the computation) can be 76 | specified either explicitly or using common suffixes such as K, M, 77 | G, T, Ki, Mi, Gi, Ti, which respectively correspond to multipliers: 78 | 10^3, 10^6, 10^9, 10^12, 2^10, 2^20, 2^30, 2^40. Suffix names are 79 | not case-sensitive, e.g., Ti = ti, k = K. 80 | - The flags specifying RAM usage, output filename, etc. can be given 81 | in any order. 82 | - Filenames passed as command-line arguments can be given as relative 83 | paths, e.g., `../input.txt` and `~/data/input.txt` are valid paths, 84 | see also example above. 85 | - To enable additional statistics about the computation (alternative 86 | counter of I/O volume and tracing of the disk usage), uncomment line 87 | with AUX_DISK_FLAGS in the Makefile. When this flag is enabled, the 88 | computation could slow down thus this flag is disabled by default. 89 | - To change the type used to encode characters of the input text or 90 | the integer type used to encode positions in the text, adjust types 91 | "char_type" and "text_offset_type" as well as the value of the 92 | variable "text_alphabet_size" in the source file ./src/main.cpp. 93 | 94 | 95 | 96 | Troubleshooting 97 | --------------- 98 | 99 | 1. I am getting an error about the exceeded number of opened files. 100 | 101 | Solution: The error is caused by the operating system imposing a limit 102 | on the maximum number of files opened by a program. The limit can be 103 | increased with the `ulimit -n newlimit` command. However, in Linux the 104 | limit cannot be increased beyond the so-called "hard limit", which is 105 | usually only few times larger. Furthermore, this is a temporary 106 | solution that needs to repeated every time a new session is 107 | started. To increase the limits permanently, edit (as a root) the file 108 | `/etc/security/limits.conf` and add the following lines at the end 109 | (including the asterisks): 110 | 111 | 112 | * soft nofile 128000 113 | * hard nofile 128000 114 | 115 | 116 | This increases the limit to 128000 (use larger values if necessary). 117 | The new limits apply (check with `ulimit -n`) after starting new 118 | session. 119 | 120 | 121 | 122 | Limitations 123 | ----------- 124 | 125 | - At present the only limitation in the usage of the algorithm is the 126 | need to ensure that the limit for the number of opened files in the 127 | system is sufficiently large to prevent the above error. This 128 | technical shortcoming will be eliminated in the future versions of 129 | fSAIS. 130 | 131 | 132 | 133 | Terms of use 134 | ------------ 135 | 136 | fSAIS is released under the MIT/X11 license. See the file LICENCE for 137 | more details. If you use this code, please cite the paper mentioned 138 | above. 139 | 140 | 141 | 142 | Authors 143 | ------- 144 | 145 | fSAIS was implemented by: 146 | - [Dominik Kempa](https://scholar.google.com/citations?user=r0Kn9IUAAAAJ) 147 | - [Juha Karkkainen](https://scholar.google.com/citations?user=oZepo1cAAAAJ) 148 | -------------------------------------------------------------------------------- /VERSION: -------------------------------------------------------------------------------- 1 | 0.1.0 2 | -------------------------------------------------------------------------------- /src/fsais_src/em_induce_plus_suffixes.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file fsais_src/em_induce_plus_suffixes.hpp 3 | * @section LICENCE 4 | * 5 | * This file is part of fSAIS v0.1.0 6 | * See: https://github.com/dominikkempa/fsais 7 | * 8 | * Copyright (C) 2016-2020 9 | * Dominik Kempa 10 | * Juha Karkkainen 11 | * 12 | * Permission is hereby granted, free of charge, to any person 13 | * obtaining a copy of this software and associated documentation 14 | * files (the "Software"), to deal in the Software without 15 | * restriction, including without limitation the rights to use, 16 | * copy, modify, merge, publish, distribute, sublicense, and/or sell 17 | * copies of the Software, and to permit persons to whom the 18 | * Software is furnished to do so, subject to the following 19 | * conditions: 20 | * 21 | * The above copyright notice and this permission notice shall be 22 | * included in all copies or substantial portions of the Software. 23 | * 24 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 26 | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 28 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 29 | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 30 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 31 | * OTHER DEALINGS IN THE SOFTWARE. 32 | **/ 33 | 34 | #ifndef __FSAIS_SRC_EM_INDUCE_PLUS_SUFFIXES_HPP_INCLUDED 35 | #define __FSAIS_SRC_EM_INDUCE_PLUS_SUFFIXES_HPP_INCLUDED 36 | 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include 42 | #include 43 | 44 | #include "io/async_stream_writer.hpp" 45 | #include "io/async_stream_writer_multipart.hpp" 46 | #include "io/async_multi_stream_reader_multipart.hpp" 47 | #include "io/async_multi_bit_stream_reader.hpp" 48 | #include "io/async_backward_stream_reader.hpp" 49 | #include "io/async_backward_stream_reader_multipart.hpp" 50 | #include "io/async_bit_stream_writer.hpp" 51 | 52 | #include "utils.hpp" 53 | #include "em_radix_heap.hpp" 54 | 55 | 56 | namespace fsais_private { 57 | 58 | template 61 | std::uint64_t em_induce_plus_suffixes( 62 | std::uint64_t text_alphabet_size, 63 | std::uint64_t text_length, 64 | std::uint64_t initial_text_length, 65 | std::uint64_t max_block_size, 66 | std::uint64_t ram_use, 67 | std::uint64_t minus_pos_n_parts, 68 | std::vector &block_count_target, 69 | std::string output_pos_filename, 70 | std::string output_type_filename, 71 | std::string output_count_filename, 72 | std::string minus_pos_filename, 73 | std::string minus_count_filename, 74 | std::vector &plus_type_filenames, 75 | std::vector &plus_pos_filenames, 76 | std::vector &symbols_filenames, 77 | std::uint64_t &total_io_volume) { 78 | std::uint64_t n_blocks = (text_length + max_block_size - 1) / max_block_size; 79 | 80 | if (text_length == 0) { 81 | fprintf(stderr, "\nError: text_length = 0\n"); 82 | std::exit(EXIT_FAILURE); 83 | } 84 | 85 | if (max_block_size == 0) { 86 | fprintf(stderr, "\nError: max_block_size = 0\n"); 87 | std::exit(EXIT_FAILURE); 88 | } 89 | 90 | if (text_alphabet_size == 0) { 91 | fprintf(stderr, "Error: text_alphabet_size = 0\n"); 92 | std::exit(EXIT_FAILURE); 93 | } 94 | 95 | if (n_blocks == 0) { 96 | fprintf(stderr, "\nError: n_blocks = 0\n"); 97 | std::exit(EXIT_FAILURE); 98 | } 99 | 100 | // Check that all types are sufficiently large. 101 | if ((std::uint64_t)std::numeric_limits::max() < text_alphabet_size - 1) { 102 | fprintf(stderr, "\nError: char_type in im_induce_minus_and_plus_suffixes too small!\n"); 103 | std::exit(EXIT_FAILURE); 104 | } 105 | if ((std::uint64_t)std::numeric_limits::max() < n_blocks - 1) { 106 | fprintf(stderr, "\nError: block_id_type in im_induce_minus_and_plus_suffixes_small too small!\n"); 107 | std::exit(EXIT_FAILURE); 108 | } 109 | if ((std::uint64_t)std::numeric_limits::max() < text_length * 2UL) { 110 | fprintf(stderr, "\nError: text_offset_type in im_induce_minus_and_plus_suffixes too small!\n"); 111 | std::exit(EXIT_FAILURE); 112 | } 113 | 114 | // Decide on the RAM budget allocation. 115 | std::uint64_t opt_buf_size = (1UL << 20); 116 | std::uint64_t computed_buf_size = 0; 117 | std::uint64_t n_buffers = 3 * n_blocks + 20; 118 | std::uint64_t ram_for_radix_heap = 0; 119 | std::uint64_t ram_for_buffers = 0; 120 | if (opt_buf_size * n_buffers <= ram_use / 2) { 121 | computed_buf_size = opt_buf_size; 122 | ram_for_buffers = computed_buf_size * n_buffers; 123 | ram_for_radix_heap = ram_use - ram_for_buffers; 124 | } else { 125 | ram_for_radix_heap = ram_use / 2; 126 | ram_for_buffers = ram_use - ram_for_radix_heap; 127 | computed_buf_size = std::max(1UL, ram_for_buffers / n_buffers); 128 | } 129 | 130 | // Start the timer. 131 | long double start = utils::wclock(); 132 | fprintf(stderr, " EM induce plus suffixes:\n"); 133 | fprintf(stderr, " Single buffer size = %lu (%.1LfMiB)\n", computed_buf_size, (1.L * computed_buf_size) / (1L << 20)); 134 | fprintf(stderr, " All buffers RAM budget = %lu (%.1LfMiB)\n", ram_for_buffers, (1.L * ram_for_buffers) / (1L << 20)); 135 | fprintf(stderr, " Radix heap RAM budget = %lu (%.1LfMiB)\n", ram_for_radix_heap, (1.L * ram_for_radix_heap) / (1L << 20)); 136 | 137 | 138 | // Initialize radix heap. 139 | std::vector radix_logs; 140 | { 141 | std::uint64_t target_sum = 8UL * sizeof(char_type); 142 | std::uint64_t cur_sum = 0; 143 | while (cur_sum < target_sum) { 144 | std::uint64_t radix_log = std::min(10UL, target_sum - cur_sum); 145 | radix_logs.push_back(radix_log); 146 | cur_sum += radix_log; 147 | } 148 | } 149 | typedef em_radix_heap radix_heap_type; 150 | radix_heap_type *radix_heap = new radix_heap_type(radix_logs, output_pos_filename, ram_for_radix_heap); 151 | 152 | // Initialize readers of data associated with minus suffixes. 153 | typedef async_backward_stream_reader minus_count_reader_type; 154 | typedef async_backward_stream_reader_multipart minus_pos_reader_type; 155 | minus_count_reader_type *minus_count_reader = new minus_count_reader_type(minus_count_filename, 4UL * computed_buf_size, 4UL); 156 | 157 | #ifdef SAIS_DEBUG 158 | minus_pos_reader_type *minus_pos_reader = NULL; 159 | { 160 | std::uint64_t reader_buf_size = utils::random_int64(1L, 50L); 161 | std::uint64_t reader_n_bufs = utils::random_int64(1L, 5L); 162 | minus_pos_reader = new minus_pos_reader_type(minus_pos_filename, minus_pos_n_parts, reader_buf_size, reader_n_bufs); 163 | } 164 | #else 165 | minus_pos_reader_type *minus_pos_reader = new minus_pos_reader_type(minus_pos_filename, minus_pos_n_parts, 4UL * computed_buf_size, 4UL); 166 | #endif 167 | 168 | // Initialize readers of data associated with plus suffixes. 169 | typedef async_multi_bit_stream_reader plus_type_reader_type; 170 | typedef async_multi_stream_reader_multipart plus_pos_reader_type; 171 | plus_type_reader_type *plus_type_reader = new plus_type_reader_type(n_blocks, computed_buf_size); 172 | plus_pos_reader_type *plus_pos_reader = new plus_pos_reader_type(n_blocks, computed_buf_size); 173 | for (std::uint64_t block_id = 0; block_id < n_blocks; ++block_id) { 174 | plus_type_reader->add_file(plus_type_filenames[block_id]); 175 | plus_pos_reader->add_file(plus_pos_filenames[block_id]); 176 | } 177 | 178 | // Initialize the readers of data associated with both types of suffixes. 179 | typedef async_multi_stream_reader_multipart symbols_reader_type; 180 | symbols_reader_type *symbols_reader = new symbols_reader_type(n_blocks, computed_buf_size); 181 | for (std::uint64_t block_id = 0; block_id < n_blocks; ++block_id) 182 | symbols_reader->add_file(symbols_filenames[block_id]); 183 | 184 | // Initialize output writers. 185 | #ifdef SAIS_DEBUG 186 | std::uint64_t max_part_size = utils::random_int64(1L, 50L); 187 | #else 188 | std::uint64_t max_part_size = std::max((1UL << 20), (text_length * sizeof(text_offset_type)) / 40UL); 189 | fprintf(stderr, " Max part size = %lu (%.1LfMiB)\n", max_part_size, (1.L * max_part_size) / (1UL << 20)); 190 | #endif 191 | 192 | typedef async_stream_writer_multipart output_pos_writer_type; 193 | typedef async_bit_stream_writer output_type_writer_type; 194 | typedef async_stream_writer output_count_writer_type; 195 | output_pos_writer_type *output_pos_writer = new output_pos_writer_type(output_pos_filename, max_part_size, 4UL * computed_buf_size, 4UL); 196 | output_type_writer_type *output_type_writer = new output_type_writer_type(output_type_filename, 4UL * computed_buf_size, 4UL); 197 | output_count_writer_type *output_count_writer = new output_count_writer_type(output_count_filename, 4UL * computed_buf_size, 4UL); 198 | 199 | bool empty_output = true; 200 | std::uint64_t max_char = std::numeric_limits::max(); 201 | std::uint64_t head_char = 0; 202 | { 203 | std::uint64_t size = utils::file_size(minus_count_filename); 204 | if (size > 0) 205 | head_char = size / sizeof(text_offset_type) - 1; 206 | } 207 | std::uint64_t prev_written_head_char = 0; 208 | std::uint64_t cur_bucket_size = 0; 209 | std::vector block_count(n_blocks, 0UL); 210 | 211 | // Induce plus suffixes. 212 | while (!radix_heap->empty() || !minus_count_reader->empty()) { 213 | 214 | // Process plus suffixes. 215 | while (!radix_heap->empty() && radix_heap->min_compare(max_char - head_char)) { 216 | std::pair p = radix_heap->extract_min(); 217 | std::uint64_t head_pos_block_id = p.second; 218 | std::uint64_t head_pos_block_beg = head_pos_block_id * max_block_size; 219 | std::uint64_t head_pos = head_pos_block_beg + plus_pos_reader->read_from_ith_file(head_pos_block_id); 220 | output_pos_writer->write(head_pos); 221 | 222 | bool is_head_pos_star = plus_type_reader->read_from_ith_file(head_pos_block_id); 223 | output_type_writer->write(is_head_pos_star); 224 | 225 | if (!empty_output) { 226 | if (head_char == prev_written_head_char) ++cur_bucket_size; 227 | else { 228 | output_count_writer->write(cur_bucket_size); 229 | for (std::uint64_t ch = prev_written_head_char; ch > head_char + 1; --ch) 230 | output_count_writer->write(0); 231 | cur_bucket_size = 1; 232 | prev_written_head_char = head_char; 233 | } 234 | } else { 235 | cur_bucket_size = 1; 236 | prev_written_head_char = head_char; 237 | } 238 | 239 | empty_output = false; 240 | if (head_pos > 0 && !is_head_pos_star) { 241 | std::uint64_t prev_pos_char = symbols_reader->read_from_ith_file(head_pos_block_id); 242 | std::uint64_t prev_pos_block_id = (head_pos_block_id * max_block_size == head_pos) ? head_pos_block_id - 1 : head_pos_block_id; 243 | radix_heap->push(max_char - prev_pos_char, prev_pos_block_id); 244 | } 245 | } 246 | 247 | // Process minus suffixes. 248 | std::uint64_t minus_sufs_count = minus_count_reader->read(); 249 | for (std::uint64_t i = 0; i < minus_sufs_count; ++i) { 250 | std::uint64_t head_pos_block_id = minus_pos_reader->read(); 251 | ++block_count[head_pos_block_id]; 252 | bool pos_starts_at_block_beg = (block_count[head_pos_block_id] == block_count_target[head_pos_block_id]); 253 | std::uint64_t prev_pos_block_id = head_pos_block_id - pos_starts_at_block_beg; 254 | std::uint64_t prev_pos_char = symbols_reader->read_from_ith_file(head_pos_block_id); 255 | radix_heap->push(max_char - prev_pos_char, prev_pos_block_id); 256 | } 257 | 258 | // Update current symbol. 259 | --head_char; 260 | } 261 | 262 | if (empty_output == false) { 263 | output_count_writer->write(cur_bucket_size); 264 | for (std::uint64_t ch = prev_written_head_char; ch > 0; --ch) 265 | output_count_writer->write(0); 266 | } 267 | 268 | // Stop I/O thread. 269 | plus_pos_reader->stop_reading(); 270 | symbols_reader->stop_reading(); 271 | minus_pos_reader->stop_reading(); 272 | minus_count_reader->stop_reading(); 273 | output_type_writer->stop_writing(); 274 | 275 | // Update I/O volume. 276 | std::uint64_t io_volume = 277 | radix_heap->io_volume() + 278 | minus_pos_reader->bytes_read() + 279 | minus_count_reader->bytes_read() + 280 | plus_type_reader->bytes_read() + 281 | plus_pos_reader->bytes_read() + 282 | symbols_reader->bytes_read() + 283 | output_pos_writer->bytes_written() + 284 | output_type_writer->bytes_written() + 285 | output_count_writer->bytes_written(); 286 | total_io_volume += io_volume; 287 | 288 | // Compute return value. 289 | std::uint64_t n_parts = output_pos_writer->get_parts_count(); 290 | 291 | // Clean up. 292 | delete output_count_writer; 293 | delete output_type_writer; 294 | delete output_pos_writer; 295 | delete symbols_reader; 296 | delete plus_pos_reader; 297 | delete plus_type_reader; 298 | delete minus_pos_reader; 299 | delete minus_count_reader; 300 | delete radix_heap; 301 | 302 | // Print summary. 303 | long double total_time = utils::wclock() - start; 304 | fprintf(stderr, " Time = %.2Lfs, I/O = %.2LfMiB/s, " 305 | "total I/O vol = %.1Lf bytes/symbol (of initial text)\n", 306 | total_time, (1.L * io_volume / (1L << 20)) / total_time, 307 | (1.L * total_io_volume) / initial_text_length); 308 | 309 | // Return the number of parts. 310 | return n_parts; 311 | } 312 | 313 | } // namespace fsais_private 314 | 315 | #endif // __FSAIS_SRC_EM_INDUCE_PLUS_SUFFIXES_HPP_INCLUDED 316 | -------------------------------------------------------------------------------- /src/fsais_src/io/async_backward_bit_stream_reader.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file fsais_src/io/async_backward_bit_stream_reader.hpp 3 | * @section LICENCE 4 | * 5 | * This file is part of fSAIS v0.1.0 6 | * See: https://github.com/dominikkempa/fsais 7 | * 8 | * Copyright (C) 2016-2020 9 | * Dominik Kempa 10 | * Juha Karkkainen 11 | * 12 | * Permission is hereby granted, free of charge, to any person 13 | * obtaining a copy of this software and associated documentation 14 | * files (the "Software"), to deal in the Software without 15 | * restriction, including without limitation the rights to use, 16 | * copy, modify, merge, publish, distribute, sublicense, and/or sell 17 | * copies of the Software, and to permit persons to whom the 18 | * Software is furnished to do so, subject to the following 19 | * conditions: 20 | * 21 | * The above copyright notice and this permission notice shall be 22 | * included in all copies or substantial portions of the Software. 23 | * 24 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 26 | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 28 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 29 | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 30 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 31 | * OTHER DEALINGS IN THE SOFTWARE. 32 | **/ 33 | 34 | #ifndef __FSAIS_SRC_IO_ASYNC_BACKWARD_BIT_STREAM_READER_HPP_INCLUDED 35 | #define __FSAIS_SRC_IO_ASYNC_BACKWARD_BIT_STREAM_READER_HPP_INCLUDED 36 | 37 | #include 38 | #include 39 | 40 | #include "async_backward_stream_reader.hpp" 41 | 42 | 43 | namespace fsais_private { 44 | 45 | class async_backward_bit_stream_reader { 46 | private: 47 | typedef async_backward_stream_reader internal_reader_type; 48 | internal_reader_type *m_internal_reader; 49 | 50 | std::uint64_t m_data; 51 | std::uint64_t m_pos; 52 | bool m_is_filled; 53 | 54 | public: 55 | async_backward_bit_stream_reader(std::string filename, 56 | std::uint64_t total_buf_size_items = (8UL << 20), 57 | std::uint64_t n_buffers = 4) { 58 | m_internal_reader = new internal_reader_type(filename, 59 | total_buf_size_items, n_buffers); 60 | m_data = 0; 61 | m_pos = 0; 62 | m_is_filled = false; 63 | } 64 | 65 | inline std::uint8_t read() { 66 | if (m_is_filled == false) { 67 | std::uint64_t bit_cnt = m_internal_reader->read(); 68 | m_pos = bit_cnt % 64; 69 | if (m_pos == 0) 70 | m_pos = 64; 71 | m_data = m_internal_reader->read(); 72 | m_is_filled = true; 73 | } else if (m_pos == 0) { 74 | m_data = m_internal_reader->read(); 75 | m_pos = 64; 76 | } 77 | 78 | return (m_data & (1UL << (--m_pos))) > 0; 79 | } 80 | 81 | void stop_reading() { 82 | m_internal_reader->stop_reading(); 83 | } 84 | 85 | inline std::uint64_t bytes_read() const { 86 | return m_internal_reader->bytes_read(); 87 | } 88 | 89 | ~async_backward_bit_stream_reader() { 90 | delete m_internal_reader; 91 | } 92 | }; 93 | 94 | } // namespace fsais_private 95 | 96 | #endif // __FSAIS_SRC_IO_ASYNC_BACKWARD_BIT_STREAM_READER_HPP_INCLUDED 97 | -------------------------------------------------------------------------------- /src/fsais_src/io/async_backward_stream_reader.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file fsais_src/io/async_backward_stream_writer.hpp 3 | * @section LICENCE 4 | * 5 | * This file is part of fSAIS v0.1.0 6 | * See: https://github.com/dominikkempa/fsais 7 | * 8 | * Copyright (C) 2016-2020 9 | * Dominik Kempa 10 | * Juha Karkkainen 11 | * 12 | * Permission is hereby granted, free of charge, to any person 13 | * obtaining a copy of this software and associated documentation 14 | * files (the "Software"), to deal in the Software without 15 | * restriction, including without limitation the rights to use, 16 | * copy, modify, merge, publish, distribute, sublicense, and/or sell 17 | * copies of the Software, and to permit persons to whom the 18 | * Software is furnished to do so, subject to the following 19 | * conditions: 20 | * 21 | * The above copyright notice and this permission notice shall be 22 | * included in all copies or substantial portions of the Software. 23 | * 24 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 26 | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 28 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 29 | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 30 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 31 | * OTHER DEALINGS IN THE SOFTWARE. 32 | **/ 33 | 34 | #ifndef __FSAIS_SRC_IO_ASYNC_BACKWARD_STREAM_READER_HPP_INCLUDED 35 | #define __FSAIS_SRC_IO_ASYNC_BACKWARD_STREAM_READER_HPP_INCLUDED 36 | 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include 42 | #include 43 | #include 44 | #include 45 | 46 | #include "../utils.hpp" 47 | 48 | 49 | namespace fsais_private { 50 | 51 | template 52 | class async_backward_stream_reader { 53 | private: 54 | template 55 | struct buffer { 56 | buffer(std::uint64_t size, T* const mem) 57 | : m_content(mem), m_size(size) { 58 | m_filled = 0; 59 | } 60 | 61 | void read_from_file(std::FILE *f) { 62 | std::uint64_t filepos = std::ftell(f); 63 | if (filepos == 0) m_filled = 0; 64 | else { 65 | m_filled = std::min(m_size, filepos / sizeof(T)); 66 | std::fseek(f, -1UL * m_filled * sizeof(T), SEEK_CUR); 67 | utils::read_from_file(m_content, m_filled, f); 68 | std::fseek(f, -1UL * m_filled * sizeof(T), SEEK_CUR); 69 | } 70 | } 71 | 72 | inline std::uint64_t size_in_bytes() const { 73 | return sizeof(T) * m_filled; 74 | } 75 | 76 | inline bool empty() const { 77 | return (m_filled == 0); 78 | } 79 | 80 | inline void set_empty() { 81 | m_filled = 0; 82 | } 83 | 84 | T* const m_content; 85 | const std::uint64_t m_size; 86 | 87 | std::uint64_t m_filled; 88 | }; 89 | 90 | template 91 | struct circular_queue { 92 | private: 93 | std::uint64_t m_size; 94 | std::uint64_t m_filled; 95 | std::uint64_t m_head; 96 | std::uint64_t m_tail; 97 | T *m_data; 98 | 99 | public: 100 | circular_queue() 101 | : m_size(1), 102 | m_filled(0), 103 | m_head(0), 104 | m_tail(0), 105 | m_data(new T[m_size]) {} 106 | 107 | inline void push(T x) { 108 | m_data[m_head++] = x; 109 | if (m_head == m_size) 110 | m_head = 0; 111 | ++m_filled; 112 | if (m_filled == m_size) 113 | enlarge(); 114 | } 115 | 116 | inline T &front() const { 117 | return m_data[m_tail]; 118 | } 119 | 120 | inline void pop() { 121 | ++m_tail; 122 | if (m_tail == m_size) 123 | m_tail = 0; 124 | --m_filled; 125 | } 126 | 127 | inline bool empty() const { 128 | return (m_filled == 0); 129 | } 130 | 131 | inline std::uint64_t size() const { 132 | return m_filled; 133 | } 134 | 135 | ~circular_queue() { 136 | delete[] m_data; 137 | } 138 | 139 | private: 140 | void enlarge() { 141 | T *new_data = new T[2 * m_size]; 142 | std::uint64_t left = m_filled; 143 | m_filled = 0; 144 | 145 | while (left > 0) { 146 | std::uint64_t tocopy = std::min(left, m_size - m_tail); 147 | std::copy(m_data + m_tail, 148 | m_data + m_tail + tocopy, new_data + m_filled); 149 | 150 | m_tail += tocopy; 151 | if (m_tail == m_size) 152 | m_tail = 0; 153 | left -= tocopy; 154 | m_filled += tocopy; 155 | } 156 | 157 | m_head = m_filled; 158 | m_tail = 0; 159 | m_size <<= 1; 160 | std::swap(m_data, new_data); 161 | delete[] new_data; 162 | } 163 | }; 164 | 165 | template 166 | struct buffer_queue { 167 | typedef buffer buffer_type; 168 | 169 | buffer_queue( 170 | std::uint64_t n_buffers, 171 | std::uint64_t items_per_buf, 172 | T *mem) { 173 | m_signal_stop = false; 174 | for (std::uint64_t i = 0; i < n_buffers; ++i) { 175 | m_queue.push(new buffer_type(items_per_buf, mem)); 176 | mem += items_per_buf; 177 | } 178 | } 179 | 180 | ~buffer_queue() { 181 | while (!m_queue.empty()) { 182 | buffer_type *buf = m_queue.front(); 183 | m_queue.pop(); 184 | delete buf; 185 | } 186 | } 187 | 188 | buffer_type *pop() { 189 | buffer_type *ret = m_queue.front(); 190 | m_queue.pop(); 191 | return ret; 192 | } 193 | 194 | void push(buffer_type *buf) { 195 | std::lock_guard lk(m_mutex); 196 | m_queue.push(buf); 197 | } 198 | 199 | void send_stop_signal() { 200 | std::lock_guard lk(m_mutex); 201 | m_signal_stop = true; 202 | } 203 | 204 | inline bool empty() const { 205 | return m_queue.empty(); 206 | } 207 | 208 | circular_queue m_queue; // Must have FIFO property 209 | std::condition_variable m_cv; 210 | std::mutex m_mutex; 211 | bool m_signal_stop; 212 | }; 213 | 214 | private: 215 | typedef buffer buffer_type; 216 | typedef buffer_queue buffer_queue_type; 217 | 218 | buffer_queue_type *m_empty_buffers; 219 | buffer_queue_type *m_full_buffers; 220 | 221 | private: 222 | template 223 | static void io_thread_code( 224 | async_backward_stream_reader *caller) { 225 | typedef buffer buffer_type; 226 | while (true) { 227 | 228 | // Wait for an empty buffer (or a stop signal). 229 | std::unique_lock lk(caller->m_empty_buffers->m_mutex); 230 | while (caller->m_empty_buffers->empty() && 231 | !(caller->m_empty_buffers->m_signal_stop)) 232 | caller->m_empty_buffers->m_cv.wait(lk); 233 | 234 | if (caller->m_empty_buffers->empty()) { 235 | 236 | // We received the stop signal -- exit. 237 | lk.unlock(); 238 | break; 239 | } 240 | 241 | // Extract the buffer from the queue. 242 | buffer_type *buffer = caller->m_empty_buffers->pop(); 243 | lk.unlock(); 244 | 245 | // Read the data from disk. 246 | buffer->read_from_file(caller->m_file); 247 | if (buffer->empty()) { 248 | 249 | // If we reached the end of file, 250 | // reinsert the buffer into the queue 251 | // of empty buffers and exit. 252 | caller->m_empty_buffers->push(buffer); 253 | caller->m_full_buffers->send_stop_signal(); 254 | caller->m_full_buffers->m_cv.notify_one(); 255 | break; 256 | } else { 257 | 258 | // Update the number of bytes read. 259 | caller->m_bytes_read += buffer->size_in_bytes(); 260 | 261 | // Add the buffer to the queue of filled buffers. 262 | caller->m_full_buffers->push(buffer); 263 | caller->m_full_buffers->m_cv.notify_one(); 264 | } 265 | } 266 | } 267 | 268 | public: 269 | void receive_new_buffer() { 270 | 271 | // Push the current buffer back to the poll of empty buffers. 272 | if (m_cur_buffer != NULL) { 273 | m_cur_buffer->set_empty(); 274 | m_empty_buffers->push(m_cur_buffer); 275 | m_empty_buffers->m_cv.notify_one(); 276 | m_cur_buffer = NULL; 277 | } 278 | 279 | // Extract a filled buffer. 280 | std::unique_lock lk(m_full_buffers->m_mutex); 281 | while (m_full_buffers->empty() && !(m_full_buffers->m_signal_stop)) 282 | m_full_buffers->m_cv.wait(lk); 283 | if (m_full_buffers->empty()) { 284 | lk.unlock(); 285 | m_cur_buffer_filled = 0; 286 | } else { 287 | m_cur_buffer = m_full_buffers->pop(); 288 | lk.unlock(); 289 | m_cur_buffer_filled = m_cur_buffer->m_filled; 290 | } 291 | m_cur_buffer_pos = m_cur_buffer_filled; 292 | } 293 | 294 | private: 295 | std::FILE *m_file; 296 | std::uint64_t m_bytes_read; 297 | std::uint64_t m_cur_buffer_pos; 298 | std::uint64_t m_cur_buffer_filled; 299 | 300 | value_type *m_mem; 301 | buffer_type *m_cur_buffer; 302 | std::thread *m_io_thread; 303 | 304 | public: 305 | 306 | // Constructor, default buffer sizes, no skip. 307 | async_backward_stream_reader(std::string filename) { 308 | init(filename, (8UL << 20), 4UL, 0UL); 309 | } 310 | 311 | // Constructor, default buffer sizes, given skip. 312 | async_backward_stream_reader(std::string filename, 313 | std::uint64_t n_skip_bytes) { 314 | init(filename, (8UL << 20), 4UL, n_skip_bytes); 315 | } 316 | 317 | // Constructor, no skip, given buffer sizes. 318 | async_backward_stream_reader(std::string filename, 319 | std::uint64_t total_buf_size_items, std::uint64_t n_buffers) { 320 | init(filename, total_buf_size_items, n_buffers, 0UL); 321 | } 322 | 323 | // Constructor, given buffer sizes and skip. 324 | async_backward_stream_reader(std::string filename, 325 | std::uint64_t total_buf_size_items, std::uint64_t n_buffers, 326 | std::uint64_t n_skip_bytes) { 327 | init(filename, total_buf_size_items, n_buffers, n_skip_bytes); 328 | } 329 | 330 | // Main initializing function. 331 | void init(std::string filename, std::uint64_t total_buf_size_bytes, 332 | std::uint64_t n_buffers, std::uint64_t n_skip_bytes) { 333 | if (n_buffers == 0) { 334 | fprintf(stderr, "\nError in async_backward_stream_reader: n_buffers == 0\n"); 335 | std::exit(EXIT_FAILURE); 336 | } 337 | 338 | // Open input file. 339 | m_file = utils::file_open_nobuf(filename.c_str(), "r"); 340 | 341 | // Reposition the file pointer if necessary. 342 | std::fseek(m_file, 0, SEEK_END); 343 | if (n_skip_bytes > 0) 344 | std::fseek(m_file, -1UL * n_skip_bytes, SEEK_CUR); 345 | 346 | // Initialize counters. 347 | m_bytes_read = 0; 348 | m_cur_buffer_pos = 0; 349 | m_cur_buffer_filled = 0; 350 | m_cur_buffer = NULL; 351 | 352 | // Computer optimal buffer size. 353 | std::uint64_t buf_size_bytes = 354 | std::max((std::uint64_t)1, total_buf_size_bytes / n_buffers); 355 | std::uint64_t items_per_buf = 356 | utils::disk_block_size(buf_size_bytes); 357 | 358 | // Allocate buffers. 359 | m_mem = utils::allocate_array(n_buffers * items_per_buf); 360 | m_empty_buffers = new buffer_queue_type(n_buffers, items_per_buf, m_mem); 361 | m_full_buffers = new buffer_queue_type(0, 0, NULL); 362 | 363 | // Start the I/O thread. 364 | m_io_thread = new std::thread(io_thread_code, this); 365 | } 366 | 367 | // Return the next item in the stream. 368 | inline value_type read() { 369 | if (m_cur_buffer_pos == 0) 370 | receive_new_buffer(); 371 | 372 | return m_cur_buffer->m_content[--m_cur_buffer_pos]; 373 | } 374 | 375 | // Read 'howmany' items into 'dest'. 376 | void read(value_type *dest, std::uint64_t howmany) { 377 | while (howmany > 0) { 378 | if (m_cur_buffer_pos == 0) 379 | receive_new_buffer(); 380 | 381 | std::uint64_t cur_buf_left = m_cur_buffer_pos; 382 | std::uint64_t tocopy = std::min(howmany, cur_buf_left); 383 | for (std::uint64_t i = 0; i < tocopy; ++i) 384 | dest[i] = m_cur_buffer->m_content[m_cur_buffer_pos - 1 - i]; 385 | m_cur_buffer_pos -= tocopy; 386 | dest += tocopy; 387 | howmany -= tocopy; 388 | } 389 | } 390 | 391 | // Return the next item in the stream. 392 | inline value_type peek() { 393 | if (m_cur_buffer_pos == 0) 394 | receive_new_buffer(); 395 | 396 | return m_cur_buffer->m_content[m_cur_buffer_pos - 1]; 397 | } 398 | 399 | // True iff there are no more items in the stream. 400 | inline bool empty() { 401 | if (m_cur_buffer_pos == 0) 402 | receive_new_buffer(); 403 | 404 | return (m_cur_buffer_pos == 0); 405 | } 406 | 407 | // Return the performed I/O in bytes. Unlike in the 408 | // writer classes (where m_bytes_written is updated 409 | // in the write methods), here m_bytes_read is updated 410 | // in the I/O thread. This is to correctly account 411 | // for the read-ahead operations in cases where user 412 | // did not read the whole file. In those cases, however, 413 | // the user must call the stop_reading() method before 414 | // calling bytes_read() to obtain the correct result. 415 | inline std::uint64_t bytes_read() const { 416 | return m_bytes_read; 417 | } 418 | 419 | // Stop the I/O thread, now the user can 420 | // cafely call the bytes_read() method. 421 | void stop_reading() { 422 | if (m_io_thread != NULL) { 423 | m_empty_buffers->send_stop_signal(); 424 | m_empty_buffers->m_cv.notify_one(); 425 | m_io_thread->join(); 426 | delete m_io_thread; 427 | m_io_thread = NULL; 428 | } 429 | } 430 | 431 | // Destructor. 432 | ~async_backward_stream_reader() { 433 | stop_reading(); 434 | 435 | // Clean up. 436 | delete m_empty_buffers; 437 | delete m_full_buffers; 438 | if (m_file != stdin) 439 | std::fclose(m_file); 440 | 441 | if (m_cur_buffer != NULL) 442 | delete m_cur_buffer; 443 | 444 | utils::deallocate(m_mem); 445 | } 446 | }; 447 | 448 | } // namespace fsais_private 449 | 450 | #endif // __FSAIS_SRC_IO_ASYNC_BACKWARD_STREAM_READER_HPP_INCLUDED 451 | -------------------------------------------------------------------------------- /src/fsais_src/io/async_backward_stream_reader_multipart.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file fsais_src/io/async_backward_stream_writer_multipart.hpp 3 | * @section LICENCE 4 | * 5 | * This file is part of fSAIS v0.1.0 6 | * See: https://github.com/dominikkempa/fsais 7 | * 8 | * Copyright (C) 2016-2020 9 | * Dominik Kempa 10 | * Juha Karkkainen 11 | * 12 | * Permission is hereby granted, free of charge, to any person 13 | * obtaining a copy of this software and associated documentation 14 | * files (the "Software"), to deal in the Software without 15 | * restriction, including without limitation the rights to use, 16 | * copy, modify, merge, publish, distribute, sublicense, and/or sell 17 | * copies of the Software, and to permit persons to whom the 18 | * Software is furnished to do so, subject to the following 19 | * conditions: 20 | * 21 | * The above copyright notice and this permission notice shall be 22 | * included in all copies or substantial portions of the Software. 23 | * 24 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 26 | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 28 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 29 | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 30 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 31 | * OTHER DEALINGS IN THE SOFTWARE. 32 | **/ 33 | 34 | #ifndef __FSAIS_SRC_IO_ASYNC_BACKWARD_STREAM_READER_MULTIPART_HPP_INCLUDED 35 | #define __FSAIS_SRC_IO_ASYNC_BACKWARD_STREAM_READER_MULTIPART_HPP_INCLUDED 36 | 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include 42 | #include 43 | #include 44 | #include 45 | 46 | #include "../utils.hpp" 47 | 48 | 49 | namespace fsais_private { 50 | 51 | template 52 | class async_backward_stream_reader_multipart { 53 | private: 54 | template 55 | struct buffer { 56 | buffer(std::uint64_t size, T* const mem) 57 | : m_content(mem), m_size(size) { 58 | m_filled = 0; 59 | } 60 | 61 | bool read_from_file(std::FILE *f) { 62 | std::uint64_t filepos = std::ftell(f); 63 | if (filepos == 0) m_filled = 0; 64 | else { 65 | m_filled = std::min(m_size, filepos / sizeof(T)); 66 | std::fseek(f, -1UL * m_filled * sizeof(T), SEEK_CUR); 67 | utils::read_from_file(m_content, m_filled, f); 68 | std::fseek(f, -1UL * m_filled * sizeof(T), SEEK_CUR); 69 | } 70 | 71 | return (filepos == m_filled * sizeof(T)); 72 | } 73 | 74 | inline std::uint64_t size_in_bytes() const { 75 | return sizeof(T) * m_filled; 76 | } 77 | 78 | inline bool empty() const { 79 | return (m_filled == 0); 80 | } 81 | 82 | inline void set_empty() { 83 | m_filled = 0; 84 | } 85 | 86 | T* const m_content; 87 | const std::uint64_t m_size; 88 | 89 | std::uint64_t m_filled; 90 | }; 91 | 92 | template 93 | struct circular_queue { 94 | private: 95 | std::uint64_t m_size; 96 | std::uint64_t m_filled; 97 | std::uint64_t m_head; 98 | std::uint64_t m_tail; 99 | T *m_data; 100 | 101 | public: 102 | circular_queue() 103 | : m_size(1), 104 | m_filled(0), 105 | m_head(0), 106 | m_tail(0), 107 | m_data(new T[m_size]) {} 108 | 109 | inline void push(T x) { 110 | m_data[m_head++] = x; 111 | if (m_head == m_size) 112 | m_head = 0; 113 | ++m_filled; 114 | if (m_filled == m_size) 115 | enlarge(); 116 | } 117 | 118 | inline T &front() const { 119 | return m_data[m_tail]; 120 | } 121 | 122 | inline void pop() { 123 | ++m_tail; 124 | if (m_tail == m_size) 125 | m_tail = 0; 126 | --m_filled; 127 | } 128 | 129 | inline bool empty() const { 130 | return (m_filled == 0); 131 | } 132 | 133 | inline std::uint64_t size() const { 134 | return m_filled; 135 | } 136 | 137 | ~circular_queue() { 138 | delete[] m_data; 139 | } 140 | 141 | private: 142 | void enlarge() { 143 | T *new_data = new T[2 * m_size]; 144 | std::uint64_t left = m_filled; 145 | m_filled = 0; 146 | 147 | while (left > 0) { 148 | std::uint64_t tocopy = std::min(left, m_size - m_tail); 149 | std::copy(m_data + m_tail, 150 | m_data + m_tail + tocopy, new_data + m_filled); 151 | 152 | m_tail += tocopy; 153 | if (m_tail == m_size) 154 | m_tail = 0; 155 | left -= tocopy; 156 | m_filled += tocopy; 157 | } 158 | 159 | m_head = m_filled; 160 | m_tail = 0; 161 | m_size <<= 1; 162 | std::swap(m_data, new_data); 163 | delete[] new_data; 164 | } 165 | }; 166 | 167 | template 168 | struct buffer_queue { 169 | typedef buffer buffer_type; 170 | 171 | buffer_queue( 172 | std::uint64_t n_buffers, 173 | std::uint64_t items_per_buf, 174 | T *mem) { 175 | m_signal_stop = false; 176 | for (std::uint64_t i = 0; i < n_buffers; ++i) { 177 | m_queue.push(new buffer_type(items_per_buf, mem)); 178 | mem += items_per_buf; 179 | } 180 | } 181 | 182 | ~buffer_queue() { 183 | while (!m_queue.empty()) { 184 | buffer_type *buf = m_queue.front(); 185 | m_queue.pop(); 186 | delete buf; 187 | } 188 | } 189 | 190 | buffer_type *pop() { 191 | buffer_type *ret = m_queue.front(); 192 | m_queue.pop(); 193 | return ret; 194 | } 195 | 196 | void push(buffer_type *buf) { 197 | std::lock_guard lk(m_mutex); 198 | m_queue.push(buf); 199 | } 200 | 201 | void send_stop_signal() { 202 | std::lock_guard lk(m_mutex); 203 | m_signal_stop = true; 204 | } 205 | 206 | inline bool empty() const { 207 | return m_queue.empty(); 208 | } 209 | 210 | circular_queue m_queue; // Must have FIFO property 211 | std::condition_variable m_cv; 212 | std::mutex m_mutex; 213 | bool m_signal_stop; 214 | }; 215 | 216 | private: 217 | typedef buffer buffer_type; 218 | typedef buffer_queue buffer_queue_type; 219 | 220 | buffer_queue_type *m_empty_buffers; 221 | buffer_queue_type *m_full_buffers; 222 | 223 | private: 224 | template 225 | static void io_thread_code( 226 | async_backward_stream_reader_multipart *caller) { 227 | typedef buffer buffer_type; 228 | while (true) { 229 | 230 | // Wait for an empty buffer (or a stop signal). 231 | std::unique_lock lk(caller->m_empty_buffers->m_mutex); 232 | while (caller->m_empty_buffers->empty() && 233 | !(caller->m_empty_buffers->m_signal_stop)) 234 | caller->m_empty_buffers->m_cv.wait(lk); 235 | 236 | if (caller->m_empty_buffers->empty()) { 237 | 238 | // We received the stop signal -- exit. 239 | lk.unlock(); 240 | break; 241 | } 242 | 243 | // Extract the buffer from the queue. 244 | buffer_type *buffer = caller->m_empty_buffers->pop(); 245 | lk.unlock(); 246 | 247 | if (caller->m_file == NULL) { 248 | std::string cur_part_filename = caller->m_filename + ".multipart_file.part" + utils::intToStr(caller->m_parts_left - 1); 249 | caller->m_file = utils::file_open(cur_part_filename, "r"); 250 | std::fseek(caller->m_file, 0, SEEK_END); 251 | } 252 | 253 | bool no_more_data = buffer->read_from_file(caller->m_file); 254 | if (buffer->empty()) { 255 | 256 | // Here we assume that any multipart writer produces 257 | // zero files, if no write operation was called. 258 | fprintf(stderr, "\nError: empty buffer in async_backward_stream_reader_multipart!\n"); 259 | std::exit(EXIT_FAILURE); 260 | } 261 | 262 | // Add the buffer to the queue of filled buffers. 263 | caller->m_full_buffers->push(buffer); 264 | caller->m_full_buffers->m_cv.notify_one(); 265 | 266 | if (no_more_data) { 267 | 268 | // We reached the beginning of file. 269 | std::fclose(caller->m_file); 270 | caller->m_file = NULL; 271 | std::string cur_part_filename = caller->m_filename + ".multipart_file.part" + utils::intToStr(caller->m_parts_left - 1); 272 | utils::file_delete(cur_part_filename); 273 | --caller->m_parts_left; 274 | if (caller->m_parts_left == 0) { 275 | caller->m_full_buffers->send_stop_signal(); 276 | caller->m_full_buffers->m_cv.notify_one(); 277 | break; 278 | } 279 | } 280 | } 281 | } 282 | 283 | public: 284 | void receive_new_buffer() { 285 | 286 | // Push the current buffer back to the poll of empty buffers. 287 | if (m_cur_buffer != NULL) { 288 | m_cur_buffer->set_empty(); 289 | m_empty_buffers->push(m_cur_buffer); 290 | m_empty_buffers->m_cv.notify_one(); 291 | m_cur_buffer = NULL; 292 | } 293 | 294 | // Extract a filled buffer. 295 | std::unique_lock lk(m_full_buffers->m_mutex); 296 | while (m_full_buffers->empty() && !(m_full_buffers->m_signal_stop)) 297 | m_full_buffers->m_cv.wait(lk); 298 | if (!m_full_buffers->empty()) { 299 | m_cur_buffer = m_full_buffers->pop(); 300 | m_cur_buffer_left = m_cur_buffer->m_filled; 301 | } 302 | lk.unlock(); 303 | } 304 | 305 | private: 306 | std::uint64_t m_bytes_read; 307 | std::uint64_t m_parts_left; 308 | std::uint64_t m_cur_buffer_left; 309 | 310 | std::FILE *m_file; 311 | std::string m_filename; 312 | 313 | value_type *m_mem; 314 | buffer_type *m_cur_buffer; 315 | std::thread *m_io_thread; 316 | 317 | public: 318 | async_backward_stream_reader_multipart(std::string filename, 319 | std::uint64_t parts_count) { 320 | init(filename, parts_count, (8UL << 20), 4UL); 321 | } 322 | 323 | async_backward_stream_reader_multipart(std::string filename, 324 | std::uint64_t parts_count, 325 | std::uint64_t total_buf_size_bytes, 326 | std::uint64_t n_buffers) { 327 | init(filename, parts_count, total_buf_size_bytes, n_buffers); 328 | } 329 | 330 | void init(std::string filename, 331 | std::uint64_t parts_count, 332 | std::uint64_t total_buf_size_bytes, 333 | std::uint64_t n_buffers) { 334 | if (n_buffers == 0) { 335 | fprintf(stderr, "\nError in async_backward_stream_reader_multipart: n_buffers == 0\n"); 336 | std::exit(EXIT_FAILURE); 337 | } 338 | 339 | // Initialize basic parameters. 340 | m_bytes_read = 0; 341 | m_cur_buffer_left = 0; 342 | m_parts_left = parts_count; 343 | m_cur_buffer = NULL; 344 | m_file = NULL; 345 | m_filename = filename; 346 | 347 | // Computer optimal buffer size. 348 | std::uint64_t buf_size_bytes = 349 | std::max((std::uint64_t)1, total_buf_size_bytes / n_buffers); 350 | std::uint64_t items_per_buf = 351 | utils::disk_block_size(buf_size_bytes); 352 | 353 | // Allocate buffers. 354 | m_mem = utils::allocate_array(n_buffers * items_per_buf); 355 | m_empty_buffers = new buffer_queue_type(n_buffers, items_per_buf, m_mem); 356 | m_full_buffers = new buffer_queue_type(0, 0, NULL); 357 | 358 | // Start the I/O thread. 359 | if (m_parts_left > 0) 360 | m_io_thread = new std::thread(io_thread_code, this); 361 | else m_io_thread = NULL; 362 | } 363 | 364 | // Return the next item in the stream. 365 | inline value_type read() { 366 | m_bytes_read += sizeof(value_type); // XXX why here? 367 | if (m_cur_buffer_left == 0) 368 | receive_new_buffer(); 369 | 370 | return m_cur_buffer->m_content[--m_cur_buffer_left]; 371 | } 372 | 373 | // Return the performed I/O in bytes. 374 | inline std::uint64_t bytes_read() const { 375 | return m_bytes_read; 376 | } 377 | 378 | // Stop the I/O thread, now the user can 379 | // cafely call the bytes_read() method. 380 | void stop_reading() { 381 | if (m_io_thread != NULL) { 382 | m_empty_buffers->send_stop_signal(); 383 | m_empty_buffers->m_cv.notify_one(); 384 | m_io_thread->join(); 385 | delete m_io_thread; 386 | m_io_thread = NULL; 387 | } 388 | } 389 | 390 | // Destructor. 391 | ~async_backward_stream_reader_multipart() { 392 | stop_reading(); 393 | 394 | // Clean up. 395 | delete m_empty_buffers; 396 | delete m_full_buffers; 397 | if (m_file != NULL) { 398 | fprintf(stderr, "\nError: m_file != NULL when destroying multipart backward stream reader!\n"); 399 | fprintf(stderr, "Most likely, not all items were read from the file!\n"); 400 | std::exit(EXIT_FAILURE); 401 | } 402 | 403 | if (m_cur_buffer != NULL) 404 | delete m_cur_buffer; 405 | 406 | utils::deallocate(m_mem); 407 | } 408 | }; 409 | 410 | } // namespace fsais_private 411 | 412 | #endif // __FSAIS_SRC_IO_ASYNC_BACKWARD_STREAM_READER_MULTIPART_HPP_INCLUDED 413 | -------------------------------------------------------------------------------- /src/fsais_src/io/async_bit_stream_writer.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file fsais_src/io/async_bit_stream_writer.hpp 3 | * @section LICENCE 4 | * 5 | * This file is part of fSAIS v0.1.0 6 | * See: https://github.com/dominikkempa/fsais 7 | * 8 | * Copyright (C) 2016-2020 9 | * Dominik Kempa 10 | * Juha Karkkainen 11 | * 12 | * Permission is hereby granted, free of charge, to any person 13 | * obtaining a copy of this software and associated documentation 14 | * files (the "Software"), to deal in the Software without 15 | * restriction, including without limitation the rights to use, 16 | * copy, modify, merge, publish, distribute, sublicense, and/or sell 17 | * copies of the Software, and to permit persons to whom the 18 | * Software is furnished to do so, subject to the following 19 | * conditions: 20 | * 21 | * The above copyright notice and this permission notice shall be 22 | * included in all copies or substantial portions of the Software. 23 | * 24 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 26 | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 28 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 29 | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 30 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 31 | * OTHER DEALINGS IN THE SOFTWARE. 32 | **/ 33 | 34 | #ifndef __FSAIS_SRC_IO_ASYNC_BIT_STREAM_WRITER_HPP_INCLUDED 35 | #define __FSAIS_SRC_IO_ASYNC_BIT_STREAM_WRITER_HPP_INCLUDED 36 | 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include 42 | #include 43 | #include 44 | 45 | #include "../utils.hpp" 46 | 47 | 48 | namespace fsais_private { 49 | 50 | class async_bit_stream_writer { 51 | private: 52 | static void io_thread_code(async_bit_stream_writer *writer) { 53 | while (true) { 54 | 55 | // Wait until the passive buffer is available. 56 | std::unique_lock lk(writer->m_mutex); 57 | while (!(writer->m_avail) && !(writer->m_finished)) 58 | writer->m_cv.wait(lk); 59 | 60 | if (!(writer->m_avail) && (writer->m_finished)) { 61 | 62 | // We're done, terminate the thread. 63 | lk.unlock(); 64 | return; 65 | } 66 | lk.unlock(); 67 | 68 | // Safely write the data to disk. 69 | utils::write_to_file(writer->m_passive_buf, 70 | writer->m_passive_buf_filled, writer->m_file); 71 | 72 | // Let the caller know that the I/O thread finished writing. 73 | lk.lock(); 74 | writer->m_avail = false; 75 | lk.unlock(); 76 | writer->m_cv.notify_one(); 77 | } 78 | } 79 | 80 | // Passes on the active buffer (full, unless it's the last one, 81 | // partially filled, buffer passed from destructor) to the I/O thread. 82 | void send_active_buf_to_write() { 83 | 84 | // Wait until the I/O thread finishes writing the previous buffer. 85 | std::unique_lock lk(m_mutex); 86 | while (m_avail == true) 87 | m_cv.wait(lk); 88 | 89 | // Set the new passive buffer. 90 | std::swap(m_active_buf, m_passive_buf); 91 | m_passive_buf_filled = m_active_buf_filled; 92 | m_active_buf_filled = 0; 93 | m_bit_pos = 0; 94 | m_active_buf[0] = 0; 95 | 96 | // Let the I/O thread know that the buffer is waiting. 97 | m_avail = true; 98 | lk.unlock(); 99 | m_cv.notify_one(); 100 | } 101 | 102 | public: 103 | async_bit_stream_writer( 104 | std::string filename, 105 | std::uint64_t buf_size_bytes, 106 | std::uint64_t n_buffers) { 107 | 108 | (void)n_buffers; // unused now. 109 | m_file = utils::file_open_nobuf(filename.c_str(), "w"); 110 | 111 | // Compute optimal buffer size. 112 | buf_size_bytes = std::max((std::uint64_t)1, buf_size_bytes / 2); 113 | m_items_per_buf = 114 | utils::disk_block_size(buf_size_bytes); 115 | 116 | // Allocate buffers. 117 | m_mem = utils::allocate_array(2UL * m_items_per_buf); 118 | m_active_buf = m_mem; 119 | m_passive_buf = m_mem + m_items_per_buf; 120 | 121 | m_active_buf[0] = 0; 122 | m_bit_pos = 0; 123 | m_active_buf_filled = 0; 124 | m_passive_buf_filled = 0; 125 | m_bits_written = 0; 126 | 127 | // Start the I/O thread. 128 | m_avail = false; 129 | m_finished = false; 130 | m_thread = new std::thread(io_thread_code, this); 131 | } 132 | 133 | ~async_bit_stream_writer() { 134 | 135 | // Clean up. 136 | delete m_thread; 137 | std::fclose(m_file); 138 | utils::deallocate(m_mem); 139 | } 140 | 141 | inline void write(std::uint8_t bit) { 142 | ++m_bits_written; 143 | m_active_buf[m_active_buf_filled] |= ((std::uint64_t)bit << m_bit_pos); 144 | ++m_bit_pos; 145 | if (m_bit_pos == 64) { 146 | m_bit_pos = 0; 147 | ++m_active_buf_filled; 148 | 149 | // If the active buffer was full, send it to I/O thread. 150 | // This function may wait a bit until the I/O thread 151 | // finishes writing the previous passive buffer. 152 | if (m_active_buf_filled == m_items_per_buf) 153 | send_active_buf_to_write(); 154 | 155 | // Clear all bits in the current byte. 156 | m_active_buf[m_active_buf_filled] = 0; 157 | } 158 | } 159 | 160 | void stop_writing() { 161 | 162 | // Write the partially filled active buffer to disk. 163 | std::uint64_t m_bit_pos_backup = m_bit_pos; 164 | if (m_bit_pos != 0) ++m_active_buf_filled; 165 | if (m_active_buf_filled > 0L) 166 | send_active_buf_to_write(); 167 | 168 | // Let the I/O thread know that we're done. 169 | std::unique_lock lk(m_mutex); 170 | m_finished = true; 171 | lk.unlock(); 172 | m_cv.notify_one(); 173 | 174 | // Wait for the thread to finish. 175 | m_thread->join(); 176 | 177 | // Append the number of bits in the last 64-bit word to file. 178 | utils::write_to_file(&m_bit_pos_backup, 1, m_file); 179 | m_bits_written += 64; 180 | } 181 | 182 | std::uint64_t bytes_written() const { 183 | std::uint64_t words_written = (m_bits_written + 63) / 64; 184 | return (std::uint64_t)8 * words_written; 185 | } 186 | 187 | private: 188 | std::uint64_t *m_mem; 189 | std::uint64_t *m_active_buf; 190 | std::uint64_t *m_passive_buf; 191 | 192 | std::uint64_t m_items_per_buf; 193 | std::uint64_t m_bit_pos; 194 | std::uint64_t m_active_buf_filled; 195 | std::uint64_t m_passive_buf_filled; 196 | std::uint64_t m_bits_written; 197 | 198 | // Used for synchronization with the I/O thread. 199 | bool m_avail; 200 | bool m_finished; 201 | std::mutex m_mutex; 202 | std::condition_variable m_cv; 203 | 204 | std::FILE *m_file; 205 | std::thread *m_thread; 206 | }; 207 | 208 | } // namespace fsais_private 209 | 210 | #endif // __FSAIS_SRC_IO_ASYNC_BIT_STREAM_WRITER_HPP_INCLUDED 211 | -------------------------------------------------------------------------------- /src/fsais_src/io/async_multi_bit_stream_reader.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file fsais_src/io/async_multi_bit_stream_reader.hpp 3 | * @section LICENCE 4 | * 5 | * This file is part of fSAIS v0.1.0 6 | * See: https://github.com/dominikkempa/fsais 7 | * 8 | * Copyright (C) 2016-2020 9 | * Dominik Kempa 10 | * Juha Karkkainen 11 | * 12 | * Permission is hereby granted, free of charge, to any person 13 | * obtaining a copy of this software and associated documentation 14 | * files (the "Software"), to deal in the Software without 15 | * restriction, including without limitation the rights to use, 16 | * copy, modify, merge, publish, distribute, sublicense, and/or sell 17 | * copies of the Software, and to permit persons to whom the 18 | * Software is furnished to do so, subject to the following 19 | * conditions: 20 | * 21 | * The above copyright notice and this permission notice shall be 22 | * included in all copies or substantial portions of the Software. 23 | * 24 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 26 | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 28 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 29 | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 30 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 31 | * OTHER DEALINGS IN THE SOFTWARE. 32 | **/ 33 | 34 | #ifndef __FSAIS_SRC_IO_ASYNC_MULTI_BIT_STREAM_READER_HPP_INCLUDED 35 | #define __FSAIS_SRC_IO_ASYNC_MULTI_BIT_STREAM_READER_HPP_INCLUDED 36 | 37 | #include 38 | #include 39 | #include 40 | 41 | #include "async_multi_stream_reader.hpp" 42 | 43 | 44 | namespace fsais_private { 45 | 46 | class async_multi_bit_stream_reader { 47 | private: 48 | typedef async_multi_stream_reader internal_reader_type; 49 | internal_reader_type *m_internal_reader; 50 | 51 | struct bit_buffer { 52 | std::uint64_t m_data; 53 | std::uint64_t m_pos; 54 | bool m_is_filled; 55 | 56 | bit_buffer() { 57 | m_data = 0; 58 | m_pos = 0; 59 | m_is_filled = false; 60 | } 61 | }; 62 | 63 | std::vector m_buffers; 64 | 65 | public: 66 | async_multi_bit_stream_reader(std::uint64_t number_of_files, 67 | std::uint64_t bufsize_per_file_in_bytes = (1UL << 20)) { 68 | m_internal_reader = new internal_reader_type(number_of_files, bufsize_per_file_in_bytes); 69 | m_buffers = std::vector(number_of_files); 70 | } 71 | 72 | void add_file(std::string filename) { 73 | m_internal_reader->add_file(filename); 74 | } 75 | 76 | inline std::uint8_t read_from_ith_file(std::uint64_t i) { 77 | if (!m_buffers[i].m_is_filled || m_buffers[i].m_pos == 64) { 78 | m_buffers[i].m_data = m_internal_reader->read_from_ith_file(i); 79 | m_buffers[i].m_pos = 0; 80 | m_buffers[i].m_is_filled = true; 81 | } 82 | 83 | return (m_buffers[i].m_data & (1UL << (m_buffers[i].m_pos++))) > 0; 84 | } 85 | 86 | void stop_reading() { 87 | m_internal_reader->stop_reading(); 88 | } 89 | 90 | inline std::uint64_t bytes_read() const { 91 | return m_internal_reader->bytes_read(); 92 | } 93 | 94 | ~async_multi_bit_stream_reader() { 95 | delete m_internal_reader; 96 | } 97 | }; 98 | 99 | } // namespace fsais_private 100 | 101 | #endif // __FSAIS_SRC_IO_ASYNC_MULTI_BIT_STREAM_READER_HPP_INCLUDED 102 | -------------------------------------------------------------------------------- /src/fsais_src/io/async_multi_stream_reader.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file fsais_src/io/async_multi_stream_reader.hpp 3 | * @section LICENCE 4 | * 5 | * This file is part of fSAIS v0.1.0 6 | * See: https://github.com/dominikkempa/fsais 7 | * 8 | * Copyright (C) 2016-2020 9 | * Dominik Kempa 10 | * Juha Karkkainen 11 | * 12 | * Permission is hereby granted, free of charge, to any person 13 | * obtaining a copy of this software and associated documentation 14 | * files (the "Software"), to deal in the Software without 15 | * restriction, including without limitation the rights to use, 16 | * copy, modify, merge, publish, distribute, sublicense, and/or sell 17 | * copies of the Software, and to permit persons to whom the 18 | * Software is furnished to do so, subject to the following 19 | * conditions: 20 | * 21 | * The above copyright notice and this permission notice shall be 22 | * included in all copies or substantial portions of the Software. 23 | * 24 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 26 | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 28 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 29 | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 30 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 31 | * OTHER DEALINGS IN THE SOFTWARE. 32 | **/ 33 | 34 | #ifndef __FSAIS_SRC_IO_ASYNC_MULTI_STREAM_READER_HPP_INCLUDED 35 | #define __FSAIS_SRC_IO_ASYNC_MULTI_STREAM_READER_HPP_INCLUDED 36 | 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include 42 | #include 43 | #include 44 | #include 45 | 46 | #include "../utils.hpp" 47 | 48 | 49 | namespace fsais_private { 50 | 51 | template 52 | class async_multi_stream_reader { 53 | private: 54 | template 55 | struct buffer { 56 | buffer(std::uint64_t size, T* const mem) 57 | : m_content(mem), m_size(size) { 58 | m_filled = 0; 59 | m_is_filled = false; 60 | } 61 | 62 | void read_from_file(std::FILE *f) { 63 | utils::read_from_file(m_content, m_size, m_filled, f); 64 | } 65 | 66 | inline std::uint64_t size_in_bytes() const { 67 | return sizeof(T) * m_filled; 68 | } 69 | 70 | T* const m_content; 71 | const std::uint64_t m_size; 72 | 73 | std::uint64_t m_filled; 74 | bool m_is_filled; 75 | }; 76 | 77 | template 78 | struct circular_queue { 79 | private: 80 | std::uint64_t m_size; 81 | std::uint64_t m_filled; 82 | std::uint64_t m_head; 83 | std::uint64_t m_tail; 84 | T *m_data; 85 | 86 | public: 87 | circular_queue() 88 | : m_size(1), 89 | m_filled(0), 90 | m_head(0), 91 | m_tail(0), 92 | m_data(new T[m_size]) {} 93 | 94 | inline void push(T x) { 95 | m_data[m_head++] = x; 96 | if (m_head == m_size) 97 | m_head = 0; 98 | ++m_filled; 99 | if (m_filled == m_size) 100 | enlarge(); 101 | } 102 | 103 | inline T &front() const { 104 | return m_data[m_tail]; 105 | } 106 | 107 | inline void pop() { 108 | ++m_tail; 109 | if (m_tail == m_size) 110 | m_tail = 0; 111 | --m_filled; 112 | } 113 | 114 | inline bool empty() const { 115 | return (m_filled == 0); 116 | } 117 | 118 | inline std::uint64_t size() const { 119 | return m_filled; 120 | } 121 | 122 | ~circular_queue() { 123 | delete[] m_data; 124 | } 125 | 126 | private: 127 | void enlarge() { 128 | T *new_data = new T[2 * m_size]; 129 | std::uint64_t left = m_filled; 130 | m_filled = 0; 131 | 132 | while (left > 0) { 133 | std::uint64_t tocopy = std::min(left, m_size - m_tail); 134 | std::copy(m_data + m_tail, 135 | m_data + m_tail + tocopy, new_data + m_filled); 136 | 137 | m_tail += tocopy; 138 | if (m_tail == m_size) 139 | m_tail = 0; 140 | left -= tocopy; 141 | m_filled += tocopy; 142 | } 143 | 144 | m_head = m_filled; 145 | m_tail = 0; 146 | m_size <<= 1; 147 | std::swap(m_data, new_data); 148 | delete[] new_data; 149 | } 150 | }; 151 | 152 | template 153 | struct request { 154 | request() {} 155 | request(buffer_type *buffer, std::uint64_t file_id) { 156 | m_buffer = buffer; 157 | m_file_id = file_id; 158 | } 159 | 160 | buffer_type *m_buffer; 161 | std::uint64_t m_file_id; 162 | }; 163 | 164 | template 165 | struct request_queue { 166 | request_queue() 167 | : m_no_more_requests(false) {} 168 | 169 | request_type get() { 170 | request_type ret = m_requests.front(); 171 | m_requests.pop(); 172 | return ret; 173 | } 174 | 175 | inline void add(request_type request) { 176 | std::lock_guard lk(m_mutex); 177 | m_requests.push(request); 178 | } 179 | 180 | inline bool empty() const { 181 | return m_requests.empty(); 182 | } 183 | 184 | circular_queue m_requests; // Must have FIFO property 185 | std::condition_variable m_cv; 186 | std::mutex m_mutex; 187 | bool m_no_more_requests; 188 | }; 189 | 190 | private: 191 | template 192 | static void async_io_thread_code( 193 | async_multi_stream_reader *caller) { 194 | typedef buffer buffer_type; 195 | typedef request request_type; 196 | while (true) { 197 | 198 | // Wait for request or until 'no more requests' flag is set. 199 | std::unique_lock lk(caller->m_read_requests.m_mutex); 200 | while (caller->m_read_requests.empty() && 201 | !(caller->m_read_requests.m_no_more_requests)) 202 | caller->m_read_requests.m_cv.wait(lk); 203 | 204 | if (caller->m_read_requests.empty() && 205 | caller->m_read_requests.m_no_more_requests) { 206 | 207 | // No more requests -- exit. 208 | lk.unlock(); 209 | break; 210 | } 211 | 212 | // Extract the buffer from the collection. 213 | request_type request = caller->m_read_requests.get(); 214 | lk.unlock(); 215 | 216 | // Process the request. 217 | request.m_buffer->read_from_file(caller->m_files[request.m_file_id]); 218 | caller->m_bytes_read += request.m_buffer->size_in_bytes(); 219 | 220 | // Update the status of the buffer 221 | // and notify the waiting thread. 222 | std::unique_lock lk2( 223 | caller->m_mutexes[request.m_file_id]); 224 | request.m_buffer->m_is_filled = true; 225 | lk2.unlock(); 226 | caller->m_cvs[request.m_file_id].notify_one(); 227 | } 228 | } 229 | 230 | private: 231 | typedef buffer buffer_type; 232 | typedef request request_type; 233 | 234 | std::uint64_t m_bytes_read; 235 | std::uint64_t m_items_per_buf; 236 | std::uint64_t n_files; 237 | std::uint64_t m_files_added; 238 | 239 | std::FILE **m_files; 240 | std::uint64_t *m_active_buffer_pos; 241 | value_type *m_mem; 242 | buffer_type **m_active_buffers; 243 | buffer_type **m_passive_buffers; 244 | std::mutex *m_mutexes; 245 | std::condition_variable *m_cvs; 246 | 247 | request_queue m_read_requests; 248 | std::thread *m_io_thread; 249 | 250 | private: 251 | void issue_read_request(std::uint64_t file_id) { 252 | request_type req(m_passive_buffers[file_id], file_id); 253 | m_read_requests.add(req); 254 | m_read_requests.m_cv.notify_one(); 255 | } 256 | 257 | void receive_new_buffer(std::uint64_t file_id) { 258 | 259 | // Wait for the I/O thread to finish reading passive buffer. 260 | std::unique_lock lk(m_mutexes[file_id]); 261 | while (m_passive_buffers[file_id]->m_is_filled == false) 262 | m_cvs[file_id].wait(lk); 263 | 264 | // Swap active and passive buffers. 265 | std::swap(m_active_buffers[file_id], m_passive_buffers[file_id]); 266 | m_active_buffer_pos[file_id] = 0; 267 | m_passive_buffers[file_id]->m_is_filled = false; 268 | lk.unlock(); 269 | 270 | // Issue the read request for the passive buffer. 271 | issue_read_request(file_id); 272 | } 273 | 274 | public: 275 | async_multi_stream_reader( 276 | std::uint64_t number_of_files, 277 | std::uint64_t buf_size_bytes = (std::uint64_t)(1 << 20)) { 278 | 279 | // Sanity check. 280 | if (number_of_files == 0) { 281 | fprintf(stderr, "\nError in async_multi_stream_reader: " 282 | "number_of_files == 0\n"); 283 | std::exit(EXIT_FAILURE); 284 | } 285 | 286 | // Initialize basic parameters. 287 | n_files = number_of_files; 288 | m_files_added = 0; 289 | m_bytes_read = 0; 290 | 291 | // Computer optimal buffer size. 292 | buf_size_bytes = std::max((std::uint64_t)1, buf_size_bytes / 2); 293 | m_items_per_buf = utils::disk_block_size(buf_size_bytes); 294 | 295 | // Allocate arrays storing info about each file. 296 | m_mutexes = new std::mutex[n_files]; 297 | m_cvs = new std::condition_variable[n_files]; 298 | m_active_buffer_pos = new std::uint64_t[n_files]; 299 | m_files = new std::FILE*[n_files]; 300 | m_active_buffers = new buffer_type*[n_files]; 301 | m_passive_buffers = new buffer_type*[n_files]; 302 | 303 | // Allocate buffers. 304 | std::uint64_t toallocate = 2 * n_files * m_items_per_buf; 305 | m_mem = utils::allocate_array(toallocate); 306 | { 307 | value_type *mem = m_mem; 308 | for (std::uint64_t i = 0; i < n_files; ++i) { 309 | m_active_buffer_pos[i] = 0; 310 | m_active_buffers[i] = new buffer_type(m_items_per_buf, mem); 311 | mem += m_items_per_buf; 312 | m_passive_buffers[i] = new buffer_type(m_items_per_buf, mem); 313 | mem += m_items_per_buf; 314 | } 315 | } 316 | 317 | // Start the I/O thread. 318 | m_io_thread = new std::thread(async_io_thread_code, this); 319 | } 320 | 321 | // The added file gets the next available ID (starting from 0). 322 | void add_file(std::string filename) { 323 | m_files[m_files_added] = utils::file_open_nobuf(filename, "r"); 324 | issue_read_request(m_files_added); 325 | ++m_files_added; 326 | } 327 | 328 | // Read from i-th file. 329 | value_type read_from_ith_file(std::uint64_t i) { 330 | if (m_active_buffer_pos[i] == m_active_buffers[i]->m_filled) 331 | receive_new_buffer(i); 332 | return m_active_buffers[i]->m_content[m_active_buffer_pos[i]++]; 333 | } 334 | 335 | // Return performed I/O in bytes. 336 | inline std::uint64_t bytes_read() const { 337 | return m_bytes_read; 338 | } 339 | 340 | // Stop the I/O thread, now the user can 341 | // cafely call the bytes_read() method. 342 | void stop_reading() { 343 | if (m_io_thread != NULL) { 344 | std::unique_lock lk(m_read_requests.m_mutex); 345 | m_read_requests.m_no_more_requests = true; 346 | lk.unlock(); 347 | m_read_requests.m_cv.notify_one(); 348 | m_io_thread->join(); 349 | delete m_io_thread; 350 | m_io_thread = NULL; 351 | } 352 | } 353 | 354 | // Destructor. 355 | ~async_multi_stream_reader() { 356 | stop_reading(); 357 | 358 | // Delete buffers. 359 | for (std::uint64_t i = 0; i < n_files; ++i) { 360 | std::fclose(m_files[i]); 361 | delete m_active_buffers[i]; 362 | delete m_passive_buffers[i]; 363 | } 364 | 365 | // Rest of the cleanup. 366 | utils::deallocate(m_mem); 367 | delete[] m_active_buffers; 368 | delete[] m_passive_buffers; 369 | delete[] m_mutexes; 370 | delete[] m_cvs; 371 | delete[] m_active_buffer_pos; 372 | delete[] m_files; 373 | } 374 | }; 375 | 376 | } // namespace fsais_private 377 | 378 | #endif // __FSAIS_SRC_IO_ASYNC_MULTI_STREAM_READER_HPP_INCLUDED 379 | -------------------------------------------------------------------------------- /src/fsais_src/io/async_multi_stream_reader_multipart.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file fsais_src/io/async_multi_stream_reader_multipart.hpp 3 | * @section LICENCE 4 | * 5 | * This file is part of fSAIS v0.1.0 6 | * See: https://github.com/dominikkempa/fsais 7 | * 8 | * Copyright (C) 2016-2020 9 | * Dominik Kempa 10 | * Juha Karkkainen 11 | * 12 | * Permission is hereby granted, free of charge, to any person 13 | * obtaining a copy of this software and associated documentation 14 | * files (the "Software"), to deal in the Software without 15 | * restriction, including without limitation the rights to use, 16 | * copy, modify, merge, publish, distribute, sublicense, and/or sell 17 | * copies of the Software, and to permit persons to whom the 18 | * Software is furnished to do so, subject to the following 19 | * conditions: 20 | * 21 | * The above copyright notice and this permission notice shall be 22 | * included in all copies or substantial portions of the Software. 23 | * 24 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 26 | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 28 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 29 | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 30 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 31 | * OTHER DEALINGS IN THE SOFTWARE. 32 | **/ 33 | 34 | #ifndef __FSAIS_SRC_IO_ASYNC_MULTI_STREAM_READER_MULTIPART_HPP_INCLUDED 35 | #define __FSAIS_SRC_IO_ASYNC_MULTI_STREAM_READER_MULTIPART_HPP_INCLUDED 36 | 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include 42 | #include 43 | #include 44 | #include 45 | 46 | #include "../utils.hpp" 47 | 48 | 49 | namespace fsais_private { 50 | 51 | template 52 | class async_multi_stream_reader_multipart { 53 | private: 54 | template 55 | struct buffer { 56 | buffer(std::uint64_t size, T* const mem) 57 | : m_content(mem), m_size(size) { 58 | m_filled = 0; 59 | m_is_filled = false; 60 | } 61 | 62 | void read_from_file(std::FILE *f) { 63 | utils::read_from_file(m_content, m_size, m_filled, f); 64 | } 65 | 66 | inline std::uint64_t size_in_bytes() const { 67 | return sizeof(T) * m_filled; 68 | } 69 | 70 | inline bool empty() const { 71 | return (m_filled == 0); 72 | } 73 | 74 | T* const m_content; 75 | const std::uint64_t m_size; 76 | 77 | std::uint64_t m_filled; 78 | bool m_is_filled; 79 | }; 80 | 81 | template 82 | struct circular_queue { 83 | private: 84 | std::uint64_t m_size; 85 | std::uint64_t m_filled; 86 | std::uint64_t m_head; 87 | std::uint64_t m_tail; 88 | T *m_data; 89 | 90 | public: 91 | circular_queue() 92 | : m_size(1), 93 | m_filled(0), 94 | m_head(0), 95 | m_tail(0), 96 | m_data(new T[m_size]) {} 97 | 98 | inline void push(T x) { 99 | m_data[m_head++] = x; 100 | if (m_head == m_size) 101 | m_head = 0; 102 | ++m_filled; 103 | if (m_filled == m_size) 104 | enlarge(); 105 | } 106 | 107 | inline T &front() const { 108 | return m_data[m_tail]; 109 | } 110 | 111 | inline void pop() { 112 | ++m_tail; 113 | if (m_tail == m_size) 114 | m_tail = 0; 115 | --m_filled; 116 | } 117 | 118 | inline bool empty() const { 119 | return (m_filled == 0); 120 | } 121 | 122 | inline std::uint64_t size() const { 123 | return m_filled; 124 | } 125 | 126 | ~circular_queue() { 127 | delete[] m_data; 128 | } 129 | 130 | private: 131 | void enlarge() { 132 | T *new_data = new T[2 * m_size]; 133 | std::uint64_t left = m_filled; 134 | m_filled = 0; 135 | 136 | while (left > 0) { 137 | std::uint64_t tocopy = std::min(left, m_size - m_tail); 138 | std::copy(m_data + m_tail, 139 | m_data + m_tail + tocopy, new_data + m_filled); 140 | 141 | m_tail += tocopy; 142 | if (m_tail == m_size) 143 | m_tail = 0; 144 | left -= tocopy; 145 | m_filled += tocopy; 146 | } 147 | 148 | m_head = m_filled; 149 | m_tail = 0; 150 | m_size <<= 1; 151 | std::swap(m_data, new_data); 152 | delete[] new_data; 153 | } 154 | }; 155 | 156 | template 157 | struct request { 158 | request() {} 159 | request(buffer_type *buffer, std::uint64_t file_id) { 160 | m_buffer = buffer; 161 | m_file_id = file_id; 162 | } 163 | 164 | buffer_type *m_buffer; 165 | std::uint64_t m_file_id; 166 | }; 167 | 168 | template 169 | struct request_queue { 170 | request_queue() 171 | : m_no_more_requests(false) {} 172 | 173 | request_type get() { 174 | request_type ret = m_requests.front(); 175 | m_requests.pop(); 176 | return ret; 177 | } 178 | 179 | inline void add(request_type request) { 180 | std::lock_guard lk(m_mutex); 181 | m_requests.push(request); 182 | } 183 | 184 | inline bool empty() const { 185 | return m_requests.empty(); 186 | } 187 | 188 | circular_queue m_requests; // Must have FIFO property 189 | std::condition_variable m_cv; 190 | std::mutex m_mutex; 191 | bool m_no_more_requests; 192 | }; 193 | 194 | private: 195 | template 196 | static void async_io_thread_code( 197 | async_multi_stream_reader_multipart *caller) { 198 | typedef buffer buffer_type; 199 | typedef request request_type; 200 | while (true) { 201 | 202 | // Wait for request or until 'no more requests' flag is set. 203 | std::unique_lock lk(caller->m_read_requests.m_mutex); 204 | while (caller->m_read_requests.empty() && 205 | !(caller->m_read_requests.m_no_more_requests)) 206 | caller->m_read_requests.m_cv.wait(lk); 207 | 208 | if (caller->m_read_requests.empty() && 209 | caller->m_read_requests.m_no_more_requests) { 210 | 211 | // No more requests -- exit. 212 | lk.unlock(); 213 | break; 214 | } 215 | 216 | // Extract the buffer from the collection. 217 | request_type request = caller->m_read_requests.get(); 218 | lk.unlock(); 219 | 220 | // Process the request. 221 | if (caller->m_files[request.m_file_id] == NULL) { 222 | // Attempt to open and read from the file. 223 | std::string cur_part_filename = caller->m_filenames[request.m_file_id] + 224 | ".multipart_file.part" + utils::intToStr(caller->m_cur_part[request.m_file_id]); 225 | if (utils::file_exists(cur_part_filename)) { 226 | caller->m_files[request.m_file_id] = utils::file_open(cur_part_filename, "r"); 227 | request.m_buffer->read_from_file(caller->m_files[request.m_file_id]); 228 | } else request.m_buffer->m_filled = 0; 229 | } else { 230 | request.m_buffer->read_from_file(caller->m_files[request.m_file_id]); 231 | if (request.m_buffer->empty()) { 232 | // Close and delete current file. 233 | std::fclose(caller->m_files[request.m_file_id]); 234 | caller->m_files[request.m_file_id] = NULL; 235 | std::string cur_part_filename = caller->m_filenames[request.m_file_id] + 236 | ".multipart_file.part" + utils::intToStr(caller->m_cur_part[request.m_file_id]); 237 | utils::file_delete(cur_part_filename); 238 | 239 | // Attempt to read from the next file. 240 | ++caller->m_cur_part[request.m_file_id]; 241 | cur_part_filename = caller->m_filenames[request.m_file_id] + 242 | ".multipart_file.part" + utils::intToStr(caller->m_cur_part[request.m_file_id]); 243 | if (utils::file_exists(cur_part_filename)) { 244 | caller->m_files[request.m_file_id] = utils::file_open(cur_part_filename, "r"); 245 | request.m_buffer->read_from_file(caller->m_files[request.m_file_id]); 246 | } else request.m_buffer->m_filled = 0; 247 | } 248 | } 249 | caller->m_bytes_read += request.m_buffer->size_in_bytes(); 250 | 251 | // Update the status of the buffer 252 | // and notify the waiting thread. 253 | std::unique_lock lk2( 254 | caller->m_mutexes[request.m_file_id]); 255 | request.m_buffer->m_is_filled = true; 256 | lk2.unlock(); 257 | caller->m_cvs[request.m_file_id].notify_one(); 258 | } 259 | } 260 | 261 | private: 262 | typedef buffer buffer_type; 263 | typedef request request_type; 264 | 265 | std::uint64_t m_bytes_read; 266 | std::uint64_t m_items_per_buf; 267 | std::uint64_t n_files; 268 | std::uint64_t m_files_added; 269 | 270 | std::FILE **m_files; 271 | std::string *m_filenames; 272 | std::uint64_t *m_cur_part; 273 | 274 | std::uint64_t *m_active_buffer_pos; 275 | value_type *m_mem; 276 | buffer_type **m_active_buffers; 277 | buffer_type **m_passive_buffers; 278 | std::mutex *m_mutexes; 279 | std::condition_variable *m_cvs; 280 | 281 | request_queue m_read_requests; 282 | std::thread *m_io_thread; 283 | 284 | private: 285 | void issue_read_request(std::uint64_t file_id) { 286 | request_type req(m_passive_buffers[file_id], file_id); 287 | m_read_requests.add(req); 288 | m_read_requests.m_cv.notify_one(); 289 | } 290 | 291 | void receive_new_buffer(std::uint64_t file_id) { 292 | 293 | // Wait for the I/O thread to finish reading passive buffer. 294 | std::unique_lock lk(m_mutexes[file_id]); 295 | while (m_passive_buffers[file_id]->m_is_filled == false) 296 | m_cvs[file_id].wait(lk); 297 | 298 | // Swap active and passive buffers. 299 | std::swap(m_active_buffers[file_id], m_passive_buffers[file_id]); 300 | m_active_buffer_pos[file_id] = 0; 301 | m_passive_buffers[file_id]->m_is_filled = false; 302 | lk.unlock(); 303 | 304 | // Issue the read request for the passive buffer. 305 | issue_read_request(file_id); 306 | } 307 | 308 | public: 309 | async_multi_stream_reader_multipart( 310 | std::uint64_t number_of_files, 311 | std::uint64_t buf_size_bytes = (std::uint64_t)(1 << 19)) { 312 | 313 | // Sanity check. 314 | if (number_of_files == 0) { 315 | fprintf(stderr, "\nError in async_multi_stream_reader_multipart: " 316 | "number_of_files == 0\n"); 317 | std::exit(EXIT_FAILURE); 318 | } 319 | 320 | // Initialize basic parameters. 321 | n_files = number_of_files; 322 | m_files_added = 0; 323 | m_bytes_read = 0; 324 | 325 | // Computer optimal buffer size. 326 | buf_size_bytes = std::max((std::uint64_t)1, buf_size_bytes / 2); 327 | m_items_per_buf = utils::disk_block_size(buf_size_bytes); 328 | 329 | // Allocate arrays storing info about each file. 330 | m_mutexes = new std::mutex[n_files]; 331 | m_cvs = new std::condition_variable[n_files]; 332 | m_active_buffer_pos = new std::uint64_t[n_files]; 333 | m_files = new std::FILE*[n_files]; 334 | m_filenames = new std::string[n_files]; 335 | m_cur_part = new std::uint64_t[n_files]; 336 | m_active_buffers = new buffer_type*[n_files]; 337 | m_passive_buffers = new buffer_type*[n_files]; 338 | 339 | // Allocate buffers. 340 | std::uint64_t toallocate = 2 * n_files * m_items_per_buf; 341 | m_mem = utils::allocate_array(toallocate); 342 | { 343 | value_type *mem = m_mem; 344 | for (std::uint64_t i = 0; i < n_files; ++i) { 345 | m_active_buffer_pos[i] = 0; 346 | m_active_buffers[i] = new buffer_type(m_items_per_buf, mem); 347 | mem += m_items_per_buf; 348 | m_passive_buffers[i] = new buffer_type(m_items_per_buf, mem); 349 | mem += m_items_per_buf; 350 | } 351 | } 352 | 353 | // Start the I/O thread. 354 | m_io_thread = new std::thread(async_io_thread_code, this); 355 | } 356 | 357 | // The added file gets the next available ID (starting from 0). 358 | void add_file(std::string filename) { 359 | m_filenames[m_files_added] = filename; 360 | m_files[m_files_added] = NULL; 361 | m_cur_part[m_files_added] = 0; 362 | issue_read_request(m_files_added); 363 | ++m_files_added; 364 | } 365 | 366 | // Read from i-th file. 367 | value_type read_from_ith_file(std::uint64_t i) { 368 | if (m_active_buffer_pos[i] == m_active_buffers[i]->m_filled) 369 | receive_new_buffer(i); 370 | return m_active_buffers[i]->m_content[m_active_buffer_pos[i]++]; 371 | } 372 | 373 | // Return performed I/O in bytes. 374 | inline std::uint64_t bytes_read() const { 375 | return m_bytes_read; 376 | } 377 | 378 | // Stop the I/O thread, now the user can 379 | // cafely call the bytes_read() method. 380 | void stop_reading() { 381 | if (m_io_thread != NULL) { 382 | std::unique_lock lk(m_read_requests.m_mutex); 383 | m_read_requests.m_no_more_requests = true; 384 | lk.unlock(); 385 | m_read_requests.m_cv.notify_one(); 386 | m_io_thread->join(); 387 | delete m_io_thread; 388 | m_io_thread = NULL; 389 | } 390 | } 391 | 392 | // Destructor. 393 | ~async_multi_stream_reader_multipart() { 394 | stop_reading(); 395 | 396 | // Delete buffers. 397 | for (std::uint64_t i = 0; i < n_files; ++i) { 398 | delete m_active_buffers[i]; 399 | delete m_passive_buffers[i]; 400 | } 401 | 402 | // Rest of the cleanup. 403 | utils::deallocate(m_mem); 404 | delete[] m_active_buffers; 405 | delete[] m_passive_buffers; 406 | delete[] m_mutexes; 407 | delete[] m_cvs; 408 | delete[] m_active_buffer_pos; 409 | delete[] m_files; 410 | delete[] m_filenames; 411 | delete[] m_cur_part; 412 | } 413 | }; 414 | 415 | } // namespace fsais_private 416 | 417 | #endif // __FSAIS_SRC_IO_ASYNC_MULTI_STREAM_READER_MULTIPART_HPP_INCLUDED 418 | -------------------------------------------------------------------------------- /src/fsais_src/io/async_multi_stream_writer.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file fsais_src/io/async_multi_stream_writer.hpp 3 | * @section LICENCE 4 | * 5 | * This file is part of fSAIS v0.1.0 6 | * See: https://github.com/dominikkempa/fsais 7 | * 8 | * Copyright (C) 2016-2020 9 | * Dominik Kempa 10 | * Juha Karkkainen 11 | * 12 | * Permission is hereby granted, free of charge, to any person 13 | * obtaining a copy of this software and associated documentation 14 | * files (the "Software"), to deal in the Software without 15 | * restriction, including without limitation the rights to use, 16 | * copy, modify, merge, publish, distribute, sublicense, and/or sell 17 | * copies of the Software, and to permit persons to whom the 18 | * Software is furnished to do so, subject to the following 19 | * conditions: 20 | * 21 | * The above copyright notice and this permission notice shall be 22 | * included in all copies or substantial portions of the Software. 23 | * 24 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 26 | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 28 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 29 | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 30 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 31 | * OTHER DEALINGS IN THE SOFTWARE. 32 | **/ 33 | 34 | #ifndef __FSAIS_SRC_IO_ASYNC_MULTI_STREAM_WRITER_HPP_INCLUDED 35 | #define __FSAIS_SRC_IO_ASYNC_MULTI_STREAM_WRITER_HPP_INCLUDED 36 | 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include 42 | #include 43 | #include 44 | #include 45 | #include 46 | 47 | #include "../utils.hpp" 48 | 49 | 50 | namespace fsais_private { 51 | 52 | template 53 | class async_multi_stream_writer { 54 | private: 55 | template 56 | struct buffer { 57 | buffer(std::uint64_t size, T* const mem) 58 | : m_content(mem), m_size(size) { 59 | m_filled = 0; 60 | } 61 | 62 | void flush_to_file(std::FILE *f) { 63 | utils::write_to_file(m_content, m_filled, f); 64 | m_filled = 0; 65 | } 66 | 67 | inline bool empty() const { 68 | return (m_filled == 0); 69 | } 70 | 71 | inline bool full() const { 72 | return (m_filled == m_size); 73 | } 74 | 75 | inline std::uint64_t size_in_bytes() const { 76 | return sizeof(T) * m_filled; 77 | } 78 | 79 | inline std::uint64_t free_space() const { 80 | return m_size - m_filled; 81 | } 82 | 83 | T* const m_content; 84 | const std::uint64_t m_size; 85 | 86 | std::uint64_t m_filled; 87 | }; 88 | 89 | template 90 | struct circular_queue { 91 | private: 92 | std::uint64_t m_size; 93 | std::uint64_t m_filled; 94 | std::uint64_t m_head; 95 | std::uint64_t m_tail; 96 | T *m_data; 97 | 98 | public: 99 | circular_queue() 100 | : m_size(1), 101 | m_filled(0), 102 | m_head(0), 103 | m_tail(0), 104 | m_data(new T[m_size]) {} 105 | 106 | inline void push(T x) { 107 | m_data[m_head++] = x; 108 | if (m_head == m_size) 109 | m_head = 0; 110 | ++m_filled; 111 | if (m_filled == m_size) 112 | enlarge(); 113 | } 114 | 115 | inline T &front() const { 116 | return m_data[m_tail]; 117 | } 118 | 119 | inline void pop() { 120 | ++m_tail; 121 | if (m_tail == m_size) 122 | m_tail = 0; 123 | --m_filled; 124 | } 125 | 126 | inline bool empty() const { 127 | return (m_filled == 0); 128 | } 129 | 130 | inline std::uint64_t size() const { 131 | return m_filled; 132 | } 133 | 134 | ~circular_queue() { 135 | delete[] m_data; 136 | } 137 | 138 | private: 139 | void enlarge() { 140 | T *new_data = new T[2 * m_size]; 141 | std::uint64_t left = m_filled; 142 | m_filled = 0; 143 | 144 | while (left > 0) { 145 | std::uint64_t tocopy = std::min(left, m_size - m_tail); 146 | std::copy(m_data + m_tail, 147 | m_data + m_tail + tocopy, new_data + m_filled); 148 | 149 | m_tail += tocopy; 150 | if (m_tail == m_size) 151 | m_tail = 0; 152 | left -= tocopy; 153 | m_filled += tocopy; 154 | } 155 | 156 | m_head = m_filled; 157 | m_tail = 0; 158 | m_size <<= 1; 159 | std::swap(m_data, new_data); 160 | delete[] new_data; 161 | } 162 | }; 163 | 164 | template 165 | struct request { 166 | request() {} 167 | request(buffer_type *buffer, std::uint64_t file_id) { 168 | m_buffer = buffer; 169 | m_file_id = file_id; 170 | } 171 | 172 | buffer_type *m_buffer; 173 | std::uint64_t m_file_id; 174 | }; 175 | 176 | template 177 | struct request_queue { 178 | request_queue() 179 | : m_no_more_requests(false) {} 180 | 181 | request_type get() { 182 | request_type ret = m_requests.front(); 183 | m_requests.pop(); 184 | return ret; 185 | } 186 | 187 | inline void add(request_type request) { 188 | std::lock_guard lk(m_mutex); 189 | m_requests.push(request); 190 | } 191 | 192 | inline bool empty() const { 193 | return m_requests.empty(); 194 | } 195 | 196 | circular_queue m_requests; // Must have FIFO property 197 | std::condition_variable m_cv; 198 | std::mutex m_mutex; 199 | bool m_no_more_requests; 200 | }; 201 | 202 | template 203 | struct buffer_collection { 204 | 205 | // Separate method to allow locking. 206 | inline void add(buffer_type *buffer) { 207 | std::lock_guard lk(m_mutex); 208 | m_buffers.push_back(buffer); 209 | } 210 | 211 | buffer_type* get() { 212 | buffer_type *ret = m_buffers.back(); 213 | m_buffers.pop_back(); 214 | return ret; 215 | } 216 | 217 | inline bool empty() const { 218 | return m_buffers.empty(); 219 | } 220 | 221 | std::vector m_buffers; 222 | std::condition_variable m_cv; 223 | std::mutex m_mutex; 224 | }; 225 | 226 | private: 227 | template 228 | static void async_io_thread_code(async_multi_stream_writer *caller) { 229 | typedef buffer buffer_type; 230 | typedef request request_type; 231 | while (true) { 232 | 233 | // Wait for request or until 'no more requests' flag is set. 234 | std::unique_lock lk(caller->m_write_requests.m_mutex); 235 | while (caller->m_write_requests.empty() && 236 | !(caller->m_write_requests.m_no_more_requests)) 237 | caller->m_write_requests.m_cv.wait(lk); 238 | 239 | if (caller->m_write_requests.empty() && 240 | caller->m_write_requests.m_no_more_requests) { 241 | 242 | // No more requests -- exit. 243 | lk.unlock(); 244 | break; 245 | } 246 | 247 | // Extract the buffer from the collection. 248 | request_type request = caller->m_write_requests.get(); 249 | lk.unlock(); 250 | 251 | // Process the request. 252 | request.m_buffer->flush_to_file(caller->m_files[request.m_file_id]); 253 | 254 | // Add the (now empty) buffer to the collection 255 | // of empty buffers and notify the waiting thread. 256 | caller->m_empty_buffers.add(request.m_buffer); 257 | caller->m_empty_buffers.m_cv.notify_one(); 258 | } 259 | } 260 | 261 | private: 262 | typedef buffer buffer_type; 263 | typedef request request_type; 264 | 265 | std::uint64_t m_bytes_written; 266 | std::uint64_t m_items_per_buf; 267 | 268 | value_type *m_mem; 269 | value_type *m_mem_ptr; 270 | std::vector m_files; 271 | std::vector m_buffers; 272 | buffer_collection m_empty_buffers; 273 | request_queue m_write_requests; 274 | std::thread *m_io_thread; 275 | 276 | // Issue a request to write to buffer. 277 | void issue_write_request(std::uint64_t file_id) { 278 | request_type req(m_buffers[file_id], file_id); 279 | m_buffers[file_id] = NULL; 280 | m_write_requests.add(req); 281 | m_write_requests.m_cv.notify_one(); 282 | } 283 | 284 | // Get a free buffer from the collection of free buffers. 285 | buffer_type* get_empty_buffer() { 286 | std::unique_lock lk(m_empty_buffers.m_mutex); 287 | while (m_empty_buffers.empty()) 288 | m_empty_buffers.m_cv.wait(lk); 289 | buffer_type *ret = m_empty_buffers.get(); 290 | lk.unlock(); 291 | return ret; 292 | } 293 | 294 | public: 295 | async_multi_stream_writer( 296 | std::uint64_t n_files, 297 | std::uint64_t buf_size_bytes = (std::uint64_t)(1 << 20), 298 | std::uint64_t n_empty_buffers = (std::uint64_t)4) { 299 | 300 | // Sanity check. 301 | if (n_files == 0) { 302 | fprintf(stderr, "\nError in async_multi_stream_writer: " 303 | "n_files == 0\n"); 304 | std::exit(EXIT_FAILURE); 305 | } 306 | 307 | // Initialize basic parameters. 308 | // Works even with n_empty_buffers == 0. 309 | m_bytes_written = 0; 310 | m_items_per_buf = utils::disk_block_size(buf_size_bytes); 311 | 312 | // Allocate buffers. 313 | std::uint64_t n_bufs = n_empty_buffers + n_files; 314 | m_mem = utils::allocate_array(n_bufs * m_items_per_buf); 315 | m_mem_ptr = m_mem; 316 | for (std::uint64_t j = 0; j < n_empty_buffers; ++j) { 317 | m_empty_buffers.add(new buffer_type(m_items_per_buf, m_mem_ptr)); 318 | m_mem_ptr += m_items_per_buf; 319 | } 320 | 321 | // Start the I/O thread. 322 | m_io_thread = new std::thread(async_io_thread_code, this); 323 | } 324 | 325 | // The added file gets the next available ID (starting from 0). 326 | void add_file(std::string filename, std::string write_mode = 327 | std::string("w")) { 328 | m_buffers.push_back(new buffer_type(m_items_per_buf, m_mem_ptr)); 329 | m_mem_ptr += m_items_per_buf; 330 | m_files.push_back(utils::file_open_nobuf(filename, write_mode)); 331 | } 332 | 333 | // Write value to i-th file. 334 | inline void write_to_ith_file(std::uint64_t i, value_type value) { 335 | 336 | // We count I/O volume here (and not in the thread doing I/O) to 337 | // avoid the situation, where user call bytes_written(), but the 338 | // I/O thread is still writing the last buffer. 339 | m_bytes_written += sizeof(value_type); 340 | m_buffers[i]->m_content[m_buffers[i]->m_filled++] = value; 341 | if (m_buffers[i]->full()) { 342 | issue_write_request(i); 343 | m_buffers[i] = get_empty_buffer(); 344 | } 345 | } 346 | 347 | // Write values[0..length) to i-th file. 348 | inline void write_to_ith_file(std::uint64_t i, 349 | const value_type *values, std::uint64_t length) { 350 | m_bytes_written += length * sizeof(value_type); 351 | while (length > 0) { 352 | std::uint64_t tocopy = std::min(length, m_buffers[i]->free_space()); 353 | std::copy(values, values + tocopy, 354 | m_buffers[i]->m_content + m_buffers[i]->m_filled); 355 | m_buffers[i]->m_filled += tocopy; 356 | values += tocopy; 357 | length -= tocopy; 358 | if (m_buffers[i]->full()) { 359 | issue_write_request(i); 360 | m_buffers[i] = get_empty_buffer(); 361 | } 362 | } 363 | } 364 | 365 | // Return performed I/O in bytes. 366 | inline std::uint64_t bytes_written() const { 367 | return m_bytes_written; 368 | } 369 | 370 | // Destructor. 371 | ~async_multi_stream_writer() { 372 | 373 | // Flush all buffers. 374 | std::uint64_t n_buffers = m_buffers.size(); 375 | for (std::uint64_t file_id = 0; file_id < n_buffers; ++file_id) { 376 | if (!(m_buffers[file_id]->empty())) 377 | issue_write_request(file_id); 378 | } 379 | 380 | // Let the I/O thread know that there 381 | // won't be any more requests. 382 | std::unique_lock lk(m_write_requests.m_mutex); 383 | m_write_requests.m_no_more_requests = true; 384 | lk.unlock(); 385 | m_write_requests.m_cv.notify_one(); 386 | 387 | // Wait for the I/O thread to finish. 388 | m_io_thread->join(); 389 | delete m_io_thread; 390 | 391 | // Delete buffers and close files. 392 | for (std::uint64_t file_id = 0; file_id < n_buffers; ++file_id) { 393 | delete m_buffers[file_id]; // Can be NULL 394 | std::fclose(m_files[file_id]); 395 | } 396 | 397 | // Delete empty buffers. 398 | while (!(m_empty_buffers.empty())) { 399 | buffer_type *buf = m_empty_buffers.get(); 400 | delete buf; 401 | } 402 | 403 | utils::deallocate(m_mem); 404 | } 405 | }; 406 | 407 | } // namespace fsais_private 408 | 409 | #endif // __FSAIS_SRC_IO_ASYNC_MULTI_STREAM_WRITER_HPP_INCLUDED 410 | -------------------------------------------------------------------------------- /src/fsais_src/io/async_stream_reader.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file fsais_src/io/async_stream_reader.hpp 3 | * @section LICENCE 4 | * 5 | * This file is part of fSAIS v0.1.0 6 | * See: https://github.com/dominikkempa/fsais 7 | * 8 | * Copyright (C) 2016-2020 9 | * Dominik Kempa 10 | * Juha Karkkainen 11 | * 12 | * Permission is hereby granted, free of charge, to any person 13 | * obtaining a copy of this software and associated documentation 14 | * files (the "Software"), to deal in the Software without 15 | * restriction, including without limitation the rights to use, 16 | * copy, modify, merge, publish, distribute, sublicense, and/or sell 17 | * copies of the Software, and to permit persons to whom the 18 | * Software is furnished to do so, subject to the following 19 | * conditions: 20 | * 21 | * The above copyright notice and this permission notice shall be 22 | * included in all copies or substantial portions of the Software. 23 | * 24 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 26 | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 28 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 29 | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 30 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 31 | * OTHER DEALINGS IN THE SOFTWARE. 32 | **/ 33 | 34 | #ifndef __FSAIS_SRC_IO_ASYNC_STREAM_READER_HPP_INCLUDED 35 | #define __FSAIS_SRC_IO_ASYNC_STREAM_READER_HPP_INCLUDED 36 | 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include 42 | #include 43 | #include 44 | #include 45 | 46 | #include "../utils.hpp" 47 | 48 | 49 | namespace fsais_private { 50 | 51 | template 52 | class async_stream_reader { 53 | private: 54 | template 55 | struct buffer { 56 | buffer(std::uint64_t size, T* const mem) 57 | : m_content(mem), m_size(size) { 58 | m_filled = 0; 59 | } 60 | 61 | void read_from_file(std::FILE *f) { 62 | utils::read_from_file(m_content, m_size, m_filled, f); 63 | } 64 | 65 | inline std::uint64_t size_in_bytes() const { 66 | return sizeof(T) * m_filled; 67 | } 68 | 69 | inline bool empty() const { 70 | return (m_filled == 0); 71 | } 72 | 73 | inline bool full() const { 74 | return (m_filled == m_size); 75 | } 76 | 77 | T* const m_content; 78 | const std::uint64_t m_size; 79 | 80 | std::uint64_t m_filled; 81 | }; 82 | 83 | template 84 | struct circular_queue { 85 | private: 86 | std::uint64_t m_size; 87 | std::uint64_t m_filled; 88 | std::uint64_t m_head; 89 | std::uint64_t m_tail; 90 | T *m_data; 91 | 92 | public: 93 | circular_queue() 94 | : m_size(1), 95 | m_filled(0), 96 | m_head(0), 97 | m_tail(0), 98 | m_data(new T[m_size]) {} 99 | 100 | inline void push(T x) { 101 | m_data[m_head++] = x; 102 | if (m_head == m_size) 103 | m_head = 0; 104 | ++m_filled; 105 | if (m_filled == m_size) 106 | enlarge(); 107 | } 108 | 109 | inline T &front() const { 110 | return m_data[m_tail]; 111 | } 112 | 113 | inline void pop() { 114 | ++m_tail; 115 | if (m_tail == m_size) 116 | m_tail = 0; 117 | --m_filled; 118 | } 119 | 120 | inline bool empty() const { 121 | return (m_filled == 0); 122 | } 123 | 124 | inline std::uint64_t size() const { 125 | return m_filled; 126 | } 127 | 128 | ~circular_queue() { 129 | delete[] m_data; 130 | } 131 | 132 | private: 133 | void enlarge() { 134 | T *new_data = new T[2 * m_size]; 135 | std::uint64_t left = m_filled; 136 | m_filled = 0; 137 | 138 | while (left > 0) { 139 | std::uint64_t tocopy = std::min(left, m_size - m_tail); 140 | std::copy(m_data + m_tail, 141 | m_data + m_tail + tocopy, new_data + m_filled); 142 | 143 | m_tail += tocopy; 144 | if (m_tail == m_size) 145 | m_tail = 0; 146 | left -= tocopy; 147 | m_filled += tocopy; 148 | } 149 | 150 | m_head = m_filled; 151 | m_tail = 0; 152 | m_size <<= 1; 153 | std::swap(m_data, new_data); 154 | delete[] new_data; 155 | } 156 | }; 157 | 158 | template 159 | struct buffer_queue { 160 | typedef buffer buffer_type; 161 | 162 | buffer_queue( 163 | std::uint64_t n_buffers, 164 | std::uint64_t items_per_buf, 165 | T *mem) { 166 | m_signal_stop = false; 167 | for (std::uint64_t i = 0; i < n_buffers; ++i) { 168 | m_queue.push(new buffer_type(items_per_buf, mem)); 169 | mem += items_per_buf; 170 | } 171 | } 172 | 173 | ~buffer_queue() { 174 | while (!m_queue.empty()) { 175 | buffer_type *buf = m_queue.front(); 176 | m_queue.pop(); 177 | delete buf; 178 | } 179 | } 180 | 181 | buffer_type *pop() { 182 | buffer_type *ret = m_queue.front(); 183 | m_queue.pop(); 184 | return ret; 185 | } 186 | 187 | void push(buffer_type *buf) { 188 | std::lock_guard lk(m_mutex); 189 | m_queue.push(buf); 190 | } 191 | 192 | void send_stop_signal() { 193 | std::lock_guard lk(m_mutex); 194 | m_signal_stop = true; 195 | } 196 | 197 | inline bool empty() const { 198 | return m_queue.empty(); 199 | } 200 | 201 | circular_queue m_queue; // Must have FIFO property 202 | std::condition_variable m_cv; 203 | std::mutex m_mutex; 204 | bool m_signal_stop; 205 | }; 206 | 207 | private: 208 | typedef buffer buffer_type; 209 | typedef buffer_queue buffer_queue_type; 210 | 211 | buffer_queue_type *m_empty_buffers; 212 | buffer_queue_type *m_full_buffers; 213 | 214 | private: 215 | template 216 | static void io_thread_code(async_stream_reader *caller) { 217 | typedef buffer buffer_type; 218 | while (true) { 219 | 220 | // Wait for an empty buffer (or a stop signal). 221 | std::unique_lock lk(caller->m_empty_buffers->m_mutex); 222 | while (caller->m_empty_buffers->empty() && 223 | !(caller->m_empty_buffers->m_signal_stop)) 224 | caller->m_empty_buffers->m_cv.wait(lk); 225 | 226 | if (caller->m_empty_buffers->empty()) { 227 | 228 | // We received the stop signal -- exit. 229 | lk.unlock(); 230 | break; 231 | } 232 | 233 | // Extract the buffer from the queue. 234 | buffer_type *buffer = caller->m_empty_buffers->pop(); 235 | lk.unlock(); 236 | 237 | // Read the data from disk. 238 | buffer->read_from_file(caller->m_file); 239 | caller->m_bytes_read += buffer->size_in_bytes(); 240 | 241 | // Check if we reached the end of file. 242 | bool end_of_file = false; 243 | if (!buffer->full()) 244 | end_of_file = true; 245 | 246 | if (!buffer->empty()) { 247 | 248 | // Add the buffer to the queue of filled buffers. 249 | caller->m_full_buffers->push(buffer); 250 | caller->m_full_buffers->m_cv.notify_one(); 251 | } else { 252 | 253 | // Reinsert into the queue of empty buffers. 254 | caller->m_empty_buffers->push(buffer); 255 | } 256 | 257 | // If we reached the end of file -- exit. 258 | if (end_of_file == true) { 259 | caller->m_full_buffers->send_stop_signal(); 260 | caller->m_full_buffers->m_cv.notify_one(); 261 | break; 262 | } 263 | } 264 | } 265 | 266 | public: 267 | void receive_new_buffer() { 268 | 269 | // Push the current buffer back to the poll of empty buffers. 270 | if (m_cur_buffer != NULL) { 271 | m_empty_buffers->push(m_cur_buffer); 272 | m_empty_buffers->m_cv.notify_one(); 273 | m_cur_buffer = NULL; 274 | } 275 | 276 | // Extract a filled buffer. 277 | std::unique_lock lk(m_full_buffers->m_mutex); 278 | while (m_full_buffers->empty() && !(m_full_buffers->m_signal_stop)) 279 | m_full_buffers->m_cv.wait(lk); 280 | m_cur_buffer_pos = 0; 281 | if (m_full_buffers->empty()) { 282 | lk.unlock(); 283 | m_cur_buffer_filled = 0; 284 | } else { 285 | m_cur_buffer = m_full_buffers->pop(); 286 | lk.unlock(); 287 | m_cur_buffer_filled = m_cur_buffer->m_filled; 288 | } 289 | } 290 | 291 | private: 292 | std::FILE *m_file; 293 | std::uint64_t m_bytes_read; 294 | std::uint64_t m_cur_buffer_pos; 295 | std::uint64_t m_cur_buffer_filled; 296 | 297 | value_type *m_mem; 298 | buffer_type *m_cur_buffer; 299 | std::thread *m_io_thread; 300 | 301 | public: 302 | 303 | // Default constructor, reads from stdin. 304 | async_stream_reader() { 305 | init("", (8UL << 20), 4UL, 0UL); 306 | } 307 | 308 | // Constructor, default buffer sizes, no skip. 309 | async_stream_reader(std::string filename) { 310 | init(filename, (8UL << 20), 4UL, 0UL); 311 | } 312 | 313 | // Constructor, default buffer sizes, given skip. 314 | async_stream_reader(std::string filename, 315 | std::uint64_t n_skip_items) { 316 | init(filename, (8UL << 20), 4UL, n_skip_items); 317 | } 318 | 319 | // Constructor, no skip, given buffer sizes. 320 | async_stream_reader(std::string filename, 321 | std::uint64_t total_buf_size_bytes, 322 | std::uint64_t n_buffers) { 323 | init(filename, total_buf_size_bytes, n_buffers, 0UL); 324 | } 325 | 326 | // Constructor, given buffer sizes and skip. 327 | async_stream_reader(std::string filename, 328 | std::uint64_t total_buf_size_bytes, 329 | std::uint64_t n_buffers, 330 | std::uint64_t n_skip_items) { 331 | init(filename, total_buf_size_bytes, n_buffers, n_skip_items); 332 | } 333 | 334 | // Main initializing function. 335 | void init( 336 | std::string filename, 337 | std::uint64_t total_buf_size_bytes, 338 | std::uint64_t n_buffers, 339 | std::uint64_t n_skip_items) { 340 | 341 | // Sanity check. 342 | if (n_buffers == 0) { 343 | fprintf(stderr, "\nError in async_stream_reader: " 344 | "n_buffers == 0\n"); 345 | std::exit(EXIT_FAILURE); 346 | } 347 | 348 | // Open/assign the input file. 349 | if (filename.empty()) m_file = stdin; 350 | else m_file = utils::file_open_nobuf(filename.c_str(), "r"); 351 | 352 | // Reposition the file pointer if necessary. 353 | if (m_file != stdin && n_skip_items > 0) 354 | std::fseek(m_file, n_skip_items * sizeof(value_type), SEEK_SET); 355 | 356 | // Initialize counters. 357 | m_bytes_read = 0; 358 | m_cur_buffer_pos = 0; 359 | m_cur_buffer_filled = 0; 360 | m_cur_buffer = NULL; 361 | 362 | // Computer optimal buffer size. 363 | std::uint64_t buf_size_bytes = 364 | std::max((std::uint64_t)1, total_buf_size_bytes / n_buffers); 365 | std::uint64_t items_per_buf = 366 | utils::disk_block_size(buf_size_bytes); 367 | 368 | // Allocate buffers. 369 | m_mem = utils::allocate_array(n_buffers * items_per_buf); 370 | m_empty_buffers = new buffer_queue_type(n_buffers, items_per_buf, m_mem); 371 | m_full_buffers = new buffer_queue_type(0, 0, NULL); 372 | 373 | // Start the I/O thread. 374 | m_io_thread = new std::thread(io_thread_code, this); 375 | } 376 | 377 | // Return the next item in the stream. 378 | inline value_type read() { 379 | if (m_cur_buffer_pos == m_cur_buffer_filled) 380 | receive_new_buffer(); 381 | 382 | return m_cur_buffer->m_content[m_cur_buffer_pos++]; 383 | } 384 | 385 | // Read 'howmany' items into 'dest'. 386 | void read(value_type *dest, std::uint64_t howmany) { 387 | while (howmany > 0) { 388 | if (m_cur_buffer_pos == m_cur_buffer_filled) 389 | receive_new_buffer(); 390 | 391 | std::uint64_t cur_buf_left = m_cur_buffer_filled - m_cur_buffer_pos; 392 | std::uint64_t tocopy = std::min(howmany, cur_buf_left); 393 | for (std::uint64_t i = 0; i < tocopy; ++i) 394 | dest[i] = m_cur_buffer->m_content[m_cur_buffer_pos + i]; 395 | m_cur_buffer_pos += tocopy; 396 | dest += tocopy; 397 | howmany -= tocopy; 398 | } 399 | } 400 | 401 | // Skip the next 'howmany' items in the stream. 402 | void skip(std::uint64_t howmany) { 403 | while (howmany > 0) { 404 | if (m_cur_buffer_pos == m_cur_buffer_filled) 405 | receive_new_buffer(); 406 | 407 | std::uint64_t toskip = std::min(howmany, 408 | m_cur_buffer_filled - m_cur_buffer_pos); 409 | m_cur_buffer_pos += toskip; 410 | howmany -= toskip; 411 | } 412 | } 413 | 414 | // Return the next item in the stream. 415 | inline value_type peek() { 416 | if (m_cur_buffer_pos == m_cur_buffer_filled) 417 | receive_new_buffer(); 418 | 419 | return m_cur_buffer->m_content[m_cur_buffer_pos]; 420 | } 421 | 422 | // True iff there are no more items in the stream. 423 | inline bool empty() { 424 | if (m_cur_buffer_pos == m_cur_buffer_filled) 425 | receive_new_buffer(); 426 | 427 | return (m_cur_buffer_pos == m_cur_buffer_filled); 428 | } 429 | 430 | // Return const ptr to internal buffer. 431 | const value_type *get_buf_ptr() const { 432 | return m_cur_buffer->m_content; 433 | } 434 | 435 | // Return the number of items in the internal buffer. 436 | std::uint64_t get_buf_filled() const { 437 | return m_cur_buffer_filled; 438 | } 439 | 440 | // Return the performed I/O in bytes. Unlike in the 441 | // writer classes (where m_bytes_written is updated 442 | // in the write methods), here m_bytes_read is updated 443 | // in the I/O thread. This is to correctly account 444 | // for the read-ahead operations in cases where user 445 | // did not read the whole file. In those cases, however, 446 | // the user must call the stop_reading() method before 447 | // calling bytes_read() to obtain the correct result. 448 | inline std::uint64_t bytes_read() const { 449 | return m_bytes_read; 450 | } 451 | 452 | // Stop the I/O thread, now the user can 453 | // cafely call the bytes_read() method. 454 | void stop_reading() { 455 | if (m_io_thread != NULL) { 456 | m_empty_buffers->send_stop_signal(); 457 | m_empty_buffers->m_cv.notify_one(); 458 | m_io_thread->join(); 459 | delete m_io_thread; 460 | m_io_thread = NULL; 461 | } 462 | } 463 | 464 | // Destructor. 465 | ~async_stream_reader() { 466 | stop_reading(); 467 | 468 | // Clean up. 469 | delete m_empty_buffers; 470 | delete m_full_buffers; 471 | if (m_file != stdin) 472 | std::fclose(m_file); 473 | 474 | if (m_cur_buffer != NULL) 475 | delete m_cur_buffer; 476 | 477 | utils::deallocate(m_mem); 478 | } 479 | }; 480 | 481 | } // namespace fsais_private 482 | 483 | #endif // __FSAIS_SRC_IO_ASYNC_STREAM_READER_HPP_INCLUDED 484 | -------------------------------------------------------------------------------- /src/fsais_src/io/async_stream_writer.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file fsais_src/io/async_stream_writer.hpp 3 | * @section LICENCE 4 | * 5 | * This file is part of fSAIS v0.1.0 6 | * See: https://github.com/dominikkempa/fsais 7 | * 8 | * Copyright (C) 2016-2020 9 | * Dominik Kempa 10 | * Juha Karkkainen 11 | * 12 | * Permission is hereby granted, free of charge, to any person 13 | * obtaining a copy of this software and associated documentation 14 | * files (the "Software"), to deal in the Software without 15 | * restriction, including without limitation the rights to use, 16 | * copy, modify, merge, publish, distribute, sublicense, and/or sell 17 | * copies of the Software, and to permit persons to whom the 18 | * Software is furnished to do so, subject to the following 19 | * conditions: 20 | * 21 | * The above copyright notice and this permission notice shall be 22 | * included in all copies or substantial portions of the Software. 23 | * 24 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 26 | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 28 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 29 | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 30 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 31 | * OTHER DEALINGS IN THE SOFTWARE. 32 | **/ 33 | 34 | #ifndef __FSAIS_SRC_IO_ASYNC_STREAM_WRITER_HPP_INCLUDED 35 | #define __FSAIS_SRC_IO_ASYNC_STREAM_WRITER_HPP_INCLUDED 36 | 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include 42 | #include 43 | #include 44 | #include 45 | 46 | #include "../utils.hpp" 47 | 48 | 49 | namespace fsais_private { 50 | 51 | template 52 | class async_stream_writer { 53 | private: 54 | template 55 | struct buffer { 56 | buffer(std::uint64_t size, T* const mem) 57 | : m_content(mem), m_size(size) { 58 | m_filled = 0; 59 | } 60 | 61 | void write_to_file(std::FILE *f) { 62 | utils::write_to_file(m_content, m_filled, f); 63 | m_filled = 0; 64 | } 65 | 66 | inline bool empty() const { 67 | return (m_filled == 0); 68 | } 69 | 70 | inline bool full() const { 71 | return (m_filled == m_size); 72 | } 73 | 74 | inline std::uint64_t size_in_bytes() const { 75 | return sizeof(T) * m_filled; 76 | } 77 | 78 | inline std::uint64_t free_space() const { 79 | return m_size - m_filled; 80 | } 81 | 82 | T* const m_content; 83 | const std::uint64_t m_size; 84 | 85 | std::uint64_t m_filled; 86 | }; 87 | 88 | template 89 | struct circular_queue { 90 | private: 91 | std::uint64_t m_size; 92 | std::uint64_t m_filled; 93 | std::uint64_t m_head; 94 | std::uint64_t m_tail; 95 | T *m_data; 96 | 97 | public: 98 | circular_queue() 99 | : m_size(1), 100 | m_filled(0), 101 | m_head(0), 102 | m_tail(0), 103 | m_data(new T[m_size]) {} 104 | 105 | inline void push(T x) { 106 | m_data[m_head++] = x; 107 | if (m_head == m_size) 108 | m_head = 0; 109 | ++m_filled; 110 | if (m_filled == m_size) 111 | enlarge(); 112 | } 113 | 114 | inline T &front() const { 115 | return m_data[m_tail]; 116 | } 117 | 118 | inline void pop() { 119 | ++m_tail; 120 | if (m_tail == m_size) 121 | m_tail = 0; 122 | --m_filled; 123 | } 124 | 125 | inline bool empty() const { 126 | return (m_filled == 0); 127 | } 128 | 129 | inline std::uint64_t size() const { 130 | return m_filled; 131 | } 132 | 133 | ~circular_queue() { 134 | delete[] m_data; 135 | } 136 | 137 | private: 138 | void enlarge() { 139 | T *new_data = new T[2 * m_size]; 140 | std::uint64_t left = m_filled; 141 | m_filled = 0; 142 | 143 | while (left > 0) { 144 | std::uint64_t tocopy = std::min(left, m_size - m_tail); 145 | std::copy(m_data + m_tail, 146 | m_data + m_tail + tocopy, new_data + m_filled); 147 | 148 | m_tail += tocopy; 149 | if (m_tail == m_size) 150 | m_tail = 0; 151 | left -= tocopy; 152 | m_filled += tocopy; 153 | } 154 | 155 | m_head = m_filled; 156 | m_tail = 0; 157 | m_size <<= 1; 158 | std::swap(m_data, new_data); 159 | delete[] new_data; 160 | } 161 | }; 162 | 163 | template 164 | struct buffer_queue { 165 | typedef buffer buffer_type; 166 | 167 | buffer_queue( 168 | std::uint64_t n_buffers, 169 | std::uint64_t items_per_buf, 170 | T *mem) { 171 | m_signal_stop = false; 172 | for (std::uint64_t i = 0; i < n_buffers; ++i) { 173 | m_queue.push(new buffer_type(items_per_buf, mem)); 174 | mem += items_per_buf; 175 | } 176 | } 177 | 178 | ~buffer_queue() { 179 | while (!m_queue.empty()) { 180 | buffer_type *buf = m_queue.front(); 181 | m_queue.pop(); 182 | delete buf; 183 | } 184 | } 185 | 186 | buffer_type *pop() { 187 | buffer_type *ret = m_queue.front(); 188 | m_queue.pop(); 189 | return ret; 190 | } 191 | 192 | void push(buffer_type *buf) { 193 | std::lock_guard lk(m_mutex); 194 | m_queue.push(buf); 195 | } 196 | 197 | void send_stop_signal() { 198 | std::lock_guard lk(m_mutex); 199 | m_signal_stop = true; 200 | } 201 | 202 | inline bool empty() const { 203 | return m_queue.empty(); 204 | } 205 | 206 | circular_queue m_queue; // Must have FIFO property 207 | std::condition_variable m_cv; 208 | std::mutex m_mutex; 209 | bool m_signal_stop; 210 | }; 211 | 212 | private: 213 | typedef buffer buffer_type; 214 | typedef buffer_queue buffer_queue_type; 215 | 216 | buffer_queue_type *m_empty_buffers; 217 | buffer_queue_type *m_full_buffers; 218 | 219 | private: 220 | template 221 | static void io_thread_code(async_stream_writer *caller) { 222 | typedef buffer buffer_type; 223 | while (true) { 224 | 225 | // Wait for the full buffer (or a stop signal). 226 | std::unique_lock lk(caller->m_full_buffers->m_mutex); 227 | while (caller->m_full_buffers->empty() && 228 | !(caller->m_full_buffers->m_signal_stop)) 229 | caller->m_full_buffers->m_cv.wait(lk); 230 | 231 | if (caller->m_full_buffers->empty()) { 232 | // We received the stop signal -- exit. 233 | lk.unlock(); 234 | break; 235 | } 236 | 237 | // Extract the buffer from the collection. 238 | buffer_type *buffer = caller->m_full_buffers->pop(); 239 | lk.unlock(); 240 | 241 | // Write the data to disk. 242 | buffer->write_to_file(caller->m_file); 243 | 244 | // Add the (now empty) buffer to the collection 245 | // of empty buffers and notify the waiting thread. 246 | caller->m_empty_buffers->push(buffer); 247 | caller->m_empty_buffers->m_cv.notify_one(); 248 | } 249 | } 250 | 251 | // Get an empty buffer from the collection of empty buffers. 252 | buffer_type* get_empty_buffer() { 253 | std::unique_lock lk(m_empty_buffers->m_mutex); 254 | while (m_empty_buffers->empty()) 255 | m_empty_buffers->m_cv.wait(lk); 256 | buffer_type *ret = m_empty_buffers->pop(); 257 | lk.unlock(); 258 | return ret; 259 | } 260 | 261 | private: 262 | std::FILE *m_file; 263 | 264 | std::uint64_t m_bytes_written; 265 | std::uint64_t m_items_per_buf; 266 | 267 | value_type *m_mem; 268 | buffer_type *m_cur_buffer; 269 | std::thread *m_io_thread; 270 | 271 | public: 272 | 273 | // Constructor, given buffer sizes and write mode. 274 | async_stream_writer(std::string filename, 275 | std::uint64_t total_buf_size_bytes, 276 | std::uint64_t n_buffers, 277 | std::string write_mode) { 278 | init(filename, total_buf_size_bytes, n_buffers, write_mode); 279 | } 280 | 281 | // Constructor, default write mode, given buffer sizes. 282 | async_stream_writer(std::string filename, 283 | std::uint64_t total_buf_size_bytes, 284 | std::uint64_t n_buffers) { 285 | init(filename, total_buf_size_bytes, n_buffers, "w"); 286 | } 287 | 288 | // Constructor, default buffer sizes, given write mode. 289 | async_stream_writer(std::string filename, 290 | std::string write_mode) { 291 | init(filename, 8 << 20, 4, write_mode); 292 | } 293 | 294 | // Constructor, default buffer sizes, default write mode. 295 | async_stream_writer(std::string filename) { 296 | init(filename, 8 << 20, 4, "w"); 297 | } 298 | 299 | // Default constructor, writes to stdout. 300 | async_stream_writer() { 301 | init("", 8 << 20, 4, "w"); 302 | } 303 | 304 | // Main initializing function. 305 | void init( 306 | std::string filename, 307 | std::uint64_t total_buf_size_bytes, 308 | std::uint64_t n_buffers, 309 | std::string write_mode) { 310 | 311 | // Sanity check. 312 | if (n_buffers == 0) { 313 | fprintf(stderr, "\nError in async_stream_writer: " 314 | "n_buffers == 0\n"); 315 | std::exit(EXIT_FAILURE); 316 | } 317 | 318 | // Open/assign the output file. 319 | if (filename.empty()) m_file = stdout; 320 | else m_file = utils::file_open_nobuf(filename.c_str(), write_mode); 321 | 322 | // Computer optimal buffer size. 323 | std::uint64_t buf_size_bytes = 324 | std::max((std::uint64_t)1, total_buf_size_bytes / n_buffers); 325 | m_items_per_buf = utils::disk_block_size(buf_size_bytes); 326 | 327 | // Allocate buffers. 328 | m_mem = utils::allocate_array(n_buffers * m_items_per_buf); 329 | m_empty_buffers = new buffer_queue_type(n_buffers, 330 | m_items_per_buf, m_mem); 331 | m_full_buffers = new buffer_queue_type(0, 0, NULL); 332 | 333 | // Initialize empty buffer. 334 | m_cur_buffer = get_empty_buffer(); 335 | m_bytes_written = 0; 336 | 337 | // Start the I/O thread. 338 | m_io_thread = new std::thread(io_thread_code, this); 339 | } 340 | 341 | // Write given item to the stream. 342 | inline void write(value_type value) { 343 | 344 | // We count I/O volume here (and not in the thread doing I/O) to 345 | // avoid the situation, where user call bytes_written(), but the 346 | // I/O thread is still writing the last buffer. 347 | m_bytes_written += sizeof(value_type); 348 | m_cur_buffer->m_content[m_cur_buffer->m_filled++] = value; 349 | if (m_cur_buffer->full()) { 350 | m_full_buffers->push(m_cur_buffer); 351 | m_full_buffers->m_cv.notify_one(); 352 | m_cur_buffer = get_empty_buffer(); 353 | } 354 | } 355 | 356 | // Write values[0..length) to the stream. 357 | inline void write(const value_type *values, std::uint64_t length) { 358 | m_bytes_written += length * sizeof(value_type); 359 | while (length > 0) { 360 | std::uint64_t tocopy = std::min(length, m_cur_buffer->free_space()); 361 | std::copy(values, values + tocopy, 362 | m_cur_buffer->m_content + m_cur_buffer->m_filled); 363 | m_cur_buffer->m_filled += tocopy; 364 | values += tocopy; 365 | length -= tocopy; 366 | if (m_cur_buffer->full()) { 367 | m_full_buffers->push(m_cur_buffer); 368 | m_full_buffers->m_cv.notify_one(); 369 | m_cur_buffer = get_empty_buffer(); 370 | } 371 | } 372 | } 373 | 374 | // Return performed I/O in bytes. 375 | inline std::uint64_t bytes_written() const { 376 | return m_bytes_written; 377 | } 378 | 379 | // Destructor. 380 | ~async_stream_writer() { 381 | 382 | // Send the last incomplete buffer for writing. 383 | if (!(m_cur_buffer->empty())) { 384 | m_full_buffers->push(m_cur_buffer); 385 | m_full_buffers->m_cv.notify_one(); 386 | m_cur_buffer = NULL; 387 | } 388 | 389 | // Let the I/O thread know that we're done. 390 | m_full_buffers->send_stop_signal(); 391 | m_full_buffers->m_cv.notify_one(); 392 | 393 | // Wait for the I/O thread to finish. 394 | m_io_thread->join(); 395 | 396 | // Delete buffers and close the file. 397 | delete m_io_thread; 398 | delete m_full_buffers; 399 | delete m_empty_buffers; 400 | 401 | if (m_file != stdout) 402 | std::fclose(m_file); 403 | 404 | if (m_cur_buffer != NULL) 405 | delete m_cur_buffer; 406 | 407 | utils::deallocate(m_mem); 408 | } 409 | }; 410 | 411 | } // namespace fsais_private 412 | 413 | #endif // __FSAIS_SRC_IO_ASYNC_STREAM_WRITER_HPP_INCLUDED 414 | -------------------------------------------------------------------------------- /src/fsais_src/io/async_stream_writer_multipart.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file fsais_src/io/async_stream_writer_multipart.hpp 3 | * @section LICENCE 4 | * 5 | * This file is part of fSAIS v0.1.0 6 | * See: https://github.com/dominikkempa/fsais 7 | * 8 | * Copyright (C) 2016-2020 9 | * Dominik Kempa 10 | * Juha Karkkainen 11 | * 12 | * Permission is hereby granted, free of charge, to any person 13 | * obtaining a copy of this software and associated documentation 14 | * files (the "Software"), to deal in the Software without 15 | * restriction, including without limitation the rights to use, 16 | * copy, modify, merge, publish, distribute, sublicense, and/or sell 17 | * copies of the Software, and to permit persons to whom the 18 | * Software is furnished to do so, subject to the following 19 | * conditions: 20 | * 21 | * The above copyright notice and this permission notice shall be 22 | * included in all copies or substantial portions of the Software. 23 | * 24 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 26 | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 28 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 29 | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 30 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 31 | * OTHER DEALINGS IN THE SOFTWARE. 32 | **/ 33 | 34 | #ifndef __FSAIS_SRC_IO_ASYNC_STREAM_WRITER_MULTIPART_HPP_INCLUDED 35 | #define __FSAIS_SRC_IO_ASYNC_STREAM_WRITER_MULTIPART_HPP_INCLUDED 36 | 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include 42 | #include 43 | #include 44 | #include 45 | 46 | #include "../utils.hpp" 47 | 48 | 49 | namespace fsais_private { 50 | 51 | template 52 | class async_stream_writer_multipart { 53 | private: 54 | template 55 | struct buffer { 56 | buffer(std::uint64_t size, T* const mem) 57 | : m_content(mem), m_size(size) { 58 | m_filled = 0; 59 | } 60 | 61 | inline bool empty() const { 62 | return (m_filled == 0); 63 | } 64 | 65 | inline bool full() const { 66 | return (m_filled == m_size); 67 | } 68 | 69 | inline std::uint64_t size_in_bytes() const { 70 | return sizeof(T) * m_filled; 71 | } 72 | 73 | inline std::uint64_t free_space() const { 74 | return m_size - m_filled; 75 | } 76 | 77 | T* const m_content; 78 | const std::uint64_t m_size; 79 | 80 | std::uint64_t m_filled; 81 | }; 82 | 83 | template 84 | struct circular_queue { 85 | private: 86 | std::uint64_t m_size; 87 | std::uint64_t m_filled; 88 | std::uint64_t m_head; 89 | std::uint64_t m_tail; 90 | T *m_data; 91 | 92 | public: 93 | circular_queue() 94 | : m_size(1), 95 | m_filled(0), 96 | m_head(0), 97 | m_tail(0), 98 | m_data(new T[m_size]) {} 99 | 100 | inline void push(T x) { 101 | m_data[m_head++] = x; 102 | if (m_head == m_size) 103 | m_head = 0; 104 | ++m_filled; 105 | if (m_filled == m_size) 106 | enlarge(); 107 | } 108 | 109 | inline T &front() const { 110 | return m_data[m_tail]; 111 | } 112 | 113 | inline void pop() { 114 | ++m_tail; 115 | if (m_tail == m_size) 116 | m_tail = 0; 117 | --m_filled; 118 | } 119 | 120 | inline bool empty() const { 121 | return (m_filled == 0); 122 | } 123 | 124 | inline std::uint64_t size() const { 125 | return m_filled; 126 | } 127 | 128 | ~circular_queue() { 129 | delete[] m_data; 130 | } 131 | 132 | private: 133 | void enlarge() { 134 | T *new_data = new T[2 * m_size]; 135 | std::uint64_t left = m_filled; 136 | m_filled = 0; 137 | 138 | while (left > 0) { 139 | std::uint64_t tocopy = std::min(left, m_size - m_tail); 140 | std::copy(m_data + m_tail, 141 | m_data + m_tail + tocopy, new_data + m_filled); 142 | 143 | m_tail += tocopy; 144 | if (m_tail == m_size) 145 | m_tail = 0; 146 | left -= tocopy; 147 | m_filled += tocopy; 148 | } 149 | 150 | m_head = m_filled; 151 | m_tail = 0; 152 | m_size <<= 1; 153 | std::swap(m_data, new_data); 154 | delete[] new_data; 155 | } 156 | }; 157 | 158 | template 159 | struct buffer_queue { 160 | typedef buffer buffer_type; 161 | 162 | buffer_queue( 163 | std::uint64_t n_buffers, 164 | std::uint64_t items_per_buf, 165 | T *mem) { 166 | m_signal_stop = false; 167 | for (std::uint64_t i = 0; i < n_buffers; ++i) { 168 | m_queue.push(new buffer_type(items_per_buf, mem)); 169 | mem += items_per_buf; 170 | } 171 | } 172 | 173 | ~buffer_queue() { 174 | while (!m_queue.empty()) { 175 | buffer_type *buf = m_queue.front(); 176 | m_queue.pop(); 177 | delete buf; 178 | } 179 | } 180 | 181 | buffer_type *pop() { 182 | buffer_type *ret = m_queue.front(); 183 | m_queue.pop(); 184 | return ret; 185 | } 186 | 187 | void push(buffer_type *buf) { 188 | std::lock_guard lk(m_mutex); 189 | m_queue.push(buf); 190 | } 191 | 192 | void send_stop_signal() { 193 | std::lock_guard lk(m_mutex); 194 | m_signal_stop = true; 195 | } 196 | 197 | inline bool empty() const { 198 | return m_queue.empty(); 199 | } 200 | 201 | circular_queue m_queue; // Must have FIFO property 202 | std::condition_variable m_cv; 203 | std::mutex m_mutex; 204 | bool m_signal_stop; 205 | }; 206 | 207 | private: 208 | typedef buffer buffer_type; 209 | typedef buffer_queue buffer_queue_type; 210 | 211 | buffer_queue_type *m_empty_buffers; 212 | buffer_queue_type *m_full_buffers; 213 | 214 | private: 215 | template 216 | static void io_thread_code(async_stream_writer_multipart *caller) { 217 | typedef buffer buffer_type; 218 | while (true) { 219 | 220 | // Wait for the full buffer (or a stop signal). 221 | std::unique_lock lk(caller->m_full_buffers->m_mutex); 222 | while (caller->m_full_buffers->empty() && 223 | !(caller->m_full_buffers->m_signal_stop)) 224 | caller->m_full_buffers->m_cv.wait(lk); 225 | 226 | if (caller->m_full_buffers->empty()) { 227 | // We received the stop signal -- exit. 228 | lk.unlock(); 229 | break; 230 | } 231 | 232 | // Extract the buffer from the collection. 233 | buffer_type *buffer = caller->m_full_buffers->pop(); 234 | lk.unlock(); 235 | 236 | // Write the data to disk. 237 | const T *ptr = buffer->m_content; 238 | while (buffer->m_filled > 0) { 239 | if (caller->m_file == NULL || caller->m_cur_part_items_written == caller->m_single_part_max_items) { 240 | if (caller->m_file != NULL) { 241 | std::fclose(caller->m_file); 242 | ++caller->m_cur_part; 243 | } else caller->m_cur_part = 0; 244 | std::string cur_part_filename = caller->m_filename + 245 | ".multipart_file.part" + utils::intToStr(caller->m_cur_part); 246 | caller->m_file = utils::file_open(cur_part_filename, "w"); 247 | caller->m_cur_part_items_written = 0; 248 | } 249 | 250 | std::uint64_t cur_part_items_left = caller->m_single_part_max_items - caller->m_cur_part_items_written; 251 | std::uint64_t towrite = std::min(cur_part_items_left, buffer->m_filled); 252 | utils::write_to_file(ptr, towrite, caller->m_file); 253 | caller->m_cur_part_items_written += towrite; 254 | buffer->m_filled -= towrite; 255 | ptr += towrite; 256 | } 257 | 258 | // Add the (now empty) buffer to the collection 259 | // of empty buffers and notify the waiting thread. 260 | caller->m_empty_buffers->push(buffer); 261 | caller->m_empty_buffers->m_cv.notify_one(); 262 | } 263 | } 264 | 265 | // Get an empty buffer from the collection of empty buffers. 266 | buffer_type* get_empty_buffer() { 267 | std::unique_lock lk(m_empty_buffers->m_mutex); 268 | while (m_empty_buffers->empty()) 269 | m_empty_buffers->m_cv.wait(lk); 270 | buffer_type *ret = m_empty_buffers->pop(); 271 | lk.unlock(); 272 | return ret; 273 | } 274 | 275 | private: 276 | std::FILE *m_file; 277 | std::string m_filename; 278 | 279 | std::uint64_t m_cur_part; 280 | std::uint64_t m_single_part_max_items; 281 | std::uint64_t m_cur_part_items_written; 282 | std::uint64_t m_bytes_written; 283 | std::uint64_t m_items_per_buf; 284 | 285 | value_type *m_mem; 286 | buffer_type *m_cur_buffer; 287 | std::thread *m_io_thread; 288 | 289 | public: 290 | async_stream_writer_multipart(std::string filename, 291 | std::uint64_t single_part_max_bytes) { 292 | init(filename, single_part_max_bytes, (8UL << 20), 4UL); 293 | } 294 | 295 | async_stream_writer_multipart(std::string filename, 296 | std::uint64_t single_part_max_bytes, 297 | std::uint64_t total_buf_size_bytes, 298 | std::uint64_t n_buffers) { 299 | init(filename, single_part_max_bytes, total_buf_size_bytes, n_buffers); 300 | } 301 | 302 | void init(std::string filename, 303 | std::uint64_t single_part_max_bytes, 304 | std::uint64_t total_buf_size_bytes, 305 | std::uint64_t n_buffers) { 306 | 307 | // Sanity check. 308 | if (n_buffers == 0) { 309 | fprintf(stderr, "\nError in async_stream_writer_multipart: n_buffers == 0\n"); 310 | std::exit(EXIT_FAILURE); 311 | } 312 | 313 | m_filename = filename; 314 | 315 | // Initialize basic parameters. Note: if no items are 316 | // written, this class does not create any files. 317 | m_single_part_max_items = std::max(1UL, single_part_max_bytes / sizeof(value_type)); 318 | m_file = NULL; 319 | 320 | // Computer optimal buffer size. 321 | std::uint64_t buf_size_bytes = 322 | std::max((std::uint64_t)1, total_buf_size_bytes / n_buffers); 323 | m_items_per_buf = utils::disk_block_size(buf_size_bytes); 324 | 325 | // Allocate buffers. 326 | m_mem = utils::allocate_array(n_buffers * m_items_per_buf); 327 | m_empty_buffers = new buffer_queue_type(n_buffers, 328 | m_items_per_buf, m_mem); 329 | m_full_buffers = new buffer_queue_type(0, 0, NULL); 330 | 331 | // Initialize empty buffer. 332 | m_cur_buffer = get_empty_buffer(); 333 | m_bytes_written = 0; 334 | 335 | // Start the I/O thread. 336 | m_io_thread = new std::thread(io_thread_code, this); 337 | } 338 | 339 | // Write given item to the stream. 340 | inline void write(value_type value) { 341 | 342 | // We count I/O volume here (and not in the thread doing I/O) to 343 | // avoid the situation, where user call bytes_written(), but the 344 | // I/O thread is still writing the last buffer. 345 | m_bytes_written += sizeof(value_type); 346 | m_cur_buffer->m_content[m_cur_buffer->m_filled++] = value; 347 | if (m_cur_buffer->full()) 348 | flush(); 349 | } 350 | 351 | // Write values[0..length) to the stream. 352 | inline void write(const value_type *values, std::uint64_t length) { 353 | m_bytes_written += length * sizeof(value_type); 354 | while (length > 0) { 355 | std::uint64_t tocopy = std::min(length, m_cur_buffer->free_space()); 356 | std::copy(values, values + tocopy, 357 | m_cur_buffer->m_content + m_cur_buffer->m_filled); 358 | m_cur_buffer->m_filled += tocopy; 359 | values += tocopy; 360 | length -= tocopy; 361 | if (m_cur_buffer->full()) 362 | flush(); 363 | } 364 | } 365 | 366 | // Return performed I/O in bytes. 367 | inline std::uint64_t bytes_written() const { 368 | return m_bytes_written; 369 | } 370 | 371 | // It's safe to call if the buffer is not full, though 372 | // in principle should only be called internally. Calling 373 | // it too often will lead to poor I/O performance. 374 | void flush() { 375 | if (!m_cur_buffer->empty()) { 376 | m_full_buffers->push(m_cur_buffer); 377 | m_full_buffers->m_cv.notify_one(); 378 | m_cur_buffer = get_empty_buffer(); 379 | } 380 | } 381 | 382 | inline std::uint64_t get_parts_count() const { 383 | std::uint64_t items_written = m_bytes_written / sizeof(value_type); 384 | std::uint64_t n_parts = (items_written + m_single_part_max_items - 1) / m_single_part_max_items; 385 | return n_parts; 386 | } 387 | 388 | ~async_stream_writer_multipart() { 389 | // Send the last incomplete buffer for writing. 390 | flush(); 391 | 392 | // Let the I/O thread know that we're done. 393 | m_full_buffers->send_stop_signal(); 394 | m_full_buffers->m_cv.notify_one(); 395 | 396 | // Wait for the I/O thread to finish. 397 | m_io_thread->join(); 398 | 399 | // Clean up. 400 | delete m_empty_buffers; 401 | delete m_full_buffers; 402 | delete m_io_thread; 403 | if (m_file != NULL) 404 | std::fclose(m_file); 405 | delete m_cur_buffer; 406 | utils::deallocate(m_mem); 407 | } 408 | }; 409 | 410 | } // namespace fsais_private 411 | 412 | #endif // __FSAIS_SRC_IO_ASYNC_STREAM_WRITER_MULTIPART_HPP_INCLUDED 413 | -------------------------------------------------------------------------------- /src/fsais_src/io/simple_accessor.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file fsais_src/io/simple_accessor.hpp 3 | * @section LICENCE 4 | * 5 | * This file is part of fSAIS v0.1.0 6 | * See: https://github.com/dominikkempa/fsais 7 | * 8 | * Copyright (C) 2016-2020 9 | * Dominik Kempa 10 | * Juha Karkkainen 11 | * 12 | * Permission is hereby granted, free of charge, to any person 13 | * obtaining a copy of this software and associated documentation 14 | * files (the "Software"), to deal in the Software without 15 | * restriction, including without limitation the rights to use, 16 | * copy, modify, merge, publish, distribute, sublicense, and/or sell 17 | * copies of the Software, and to permit persons to whom the 18 | * Software is furnished to do so, subject to the following 19 | * conditions: 20 | * 21 | * The above copyright notice and this permission notice shall be 22 | * included in all copies or substantial portions of the Software. 23 | * 24 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 26 | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 28 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 29 | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 30 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 31 | * OTHER DEALINGS IN THE SOFTWARE. 32 | **/ 33 | 34 | #ifndef __FSAIS_SRC_IO_SIMPLE_ACCESSOR_HPP_INCLUDED 35 | #define __FSAIS_SRC_IO_SIMPLE_ACCESSOR_HPP_INCLUDED 36 | 37 | #include 38 | #include 39 | 40 | #include "../utils.hpp" 41 | 42 | 43 | namespace fsais_private { 44 | 45 | template 46 | class simple_accessor { 47 | public: 48 | typedef ValueType value_type; 49 | 50 | private: 51 | std::uint64_t m_bytes_read; 52 | std::uint64_t m_file_items; 53 | std::uint64_t m_items_per_buf; 54 | std::uint64_t m_buf_pos; 55 | std::uint64_t m_buf_filled; 56 | 57 | value_type *m_buf; 58 | std::FILE *m_file; 59 | 60 | public: 61 | simple_accessor(std::string filename, std::uint64_t bufsize = (2UL << 20)) { 62 | m_items_per_buf = utils::disk_block_size(bufsize); 63 | m_file_items = utils::file_size(filename) / sizeof(value_type); 64 | m_file = utils::file_open_nobuf(filename, "r"); 65 | 66 | m_buf = utils::allocate_array(m_items_per_buf); 67 | m_buf_filled = 0; 68 | m_buf_pos = 0; 69 | m_bytes_read = 0; 70 | } 71 | 72 | inline value_type access(std::uint64_t i) { 73 | if (!(m_buf_pos <= i && i < m_buf_pos + m_buf_filled)) { 74 | if (i >= m_items_per_buf / 2) m_buf_pos = i - m_items_per_buf / 2; 75 | else m_buf_pos = 0; 76 | m_buf_filled = std::min(m_file_items - m_buf_pos, m_items_per_buf); 77 | std::fseek(m_file, m_buf_pos * sizeof(value_type), SEEK_SET); 78 | utils::read_from_file(m_buf, m_buf_filled, m_file); 79 | m_bytes_read += m_buf_filled * sizeof(value_type); 80 | } 81 | 82 | return m_buf[i - m_buf_pos]; 83 | } 84 | 85 | inline std::uint64_t bytes_read() const { 86 | return m_bytes_read; 87 | } 88 | 89 | ~simple_accessor() { 90 | std::fclose(m_file); 91 | utils::deallocate(m_buf); 92 | } 93 | }; 94 | 95 | } // namespace fsais_private 96 | 97 | #endif // __FSAIS_SRC_IO_SIMPLE_ACCESSOR_HPP_INCLUDED 98 | -------------------------------------------------------------------------------- /src/fsais_src/naive_compute_sa.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file fsais_src/naive_compute_sa.hpp 3 | * @section LICENCE 4 | * 5 | * This file is part of fSAIS v0.1.0 6 | * See: https://github.com/dominikkempa/fsais 7 | * 8 | * Copyright (C) 2016-2020 9 | * Dominik Kempa 10 | * Juha Karkkainen 11 | * 12 | * Permission is hereby granted, free of charge, to any person 13 | * obtaining a copy of this software and associated documentation 14 | * files (the "Software"), to deal in the Software without 15 | * restriction, including without limitation the rights to use, 16 | * copy, modify, merge, publish, distribute, sublicense, and/or sell 17 | * copies of the Software, and to permit persons to whom the 18 | * Software is furnished to do so, subject to the following 19 | * conditions: 20 | * 21 | * The above copyright notice and this permission notice shall be 22 | * included in all copies or substantial portions of the Software. 23 | * 24 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 26 | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 28 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 29 | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 30 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 31 | * OTHER DEALINGS IN THE SOFTWARE. 32 | **/ 33 | 34 | #ifndef __FSAIS_SRC_NAIVE_COMPUTE_SA_HPP_INCLUDED 35 | #define __FSAIS_SRC_NAIVE_COMPUTE_SA_HPP_INCLUDED 36 | 37 | #include 38 | #include 39 | #include 40 | #include 41 | 42 | 43 | namespace fsais_private { 44 | namespace naive_compute_sa { 45 | 46 | template 47 | class substring { 48 | public: 49 | typedef substring substring_type; 50 | 51 | substring() {} 52 | substring( 53 | const char_type * const text, 54 | const std::uint64_t beg, 55 | const std::uint64_t length, 56 | const std::uint64_t text_length) { 57 | 58 | m_beg = beg; 59 | m_text_length = text_length; 60 | for (std::uint64_t j = 0; j < length; ++j) 61 | m_data.push_back(text[beg + j]); 62 | } 63 | 64 | inline bool operator < (const substring_type &s) const { 65 | std::uint64_t lcp = 0; 66 | while (m_beg + lcp < m_text_length && 67 | s.m_beg + lcp < m_text_length && 68 | m_data[lcp] == s.m_data[lcp]) 69 | ++lcp; 70 | 71 | return (m_beg + lcp == m_text_length || 72 | (s.m_beg + lcp < m_text_length && 73 | (std::uint64_t)m_data[lcp] < (std::uint64_t)s.m_data[lcp])); 74 | } 75 | 76 | std::uint64_t m_beg; 77 | std::uint64_t m_text_length; 78 | std::vector m_data; 79 | }; 80 | 81 | template< 82 | typename char_type, 83 | typename text_offset_type> 84 | void naive_compute_sa( 85 | const char_type * const text, 86 | const std::uint64_t text_length, 87 | text_offset_type * const sa) { 88 | 89 | typedef substring substring_type; 90 | std::vector substrings; 91 | 92 | for (std::uint64_t i = 0; i < text_length; ++i) 93 | substrings.push_back( 94 | substring_type(text, i, text_length - i, text_length)); 95 | 96 | std::sort(substrings.begin(), substrings.end()); 97 | for (std::uint64_t i = 0; i < text_length; ++i) 98 | sa[i] = substrings[i].m_beg; 99 | } 100 | 101 | } // namespace naive_compute_sa 102 | } // namespace fsais_private 103 | 104 | #endif // __FSAIS_SRC_NAIVE_COMPUTE_SA_HPP_INCLUDED 105 | -------------------------------------------------------------------------------- /src/fsais_src/packed_pair.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file fsais_src/packed_pair.hpp 3 | * @section LICENCE 4 | * 5 | * This file is part of fSAIS v0.1.0 6 | * See: https://github.com/dominikkempa/fsais 7 | * 8 | * Copyright (C) 2016-2020 9 | * Dominik Kempa 10 | * Juha Karkkainen 11 | * 12 | * Permission is hereby granted, free of charge, to any person 13 | * obtaining a copy of this software and associated documentation 14 | * files (the "Software"), to deal in the Software without 15 | * restriction, including without limitation the rights to use, 16 | * copy, modify, merge, publish, distribute, sublicense, and/or sell 17 | * copies of the Software, and to permit persons to whom the 18 | * Software is furnished to do so, subject to the following 19 | * conditions: 20 | * 21 | * The above copyright notice and this permission notice shall be 22 | * included in all copies or substantial portions of the Software. 23 | * 24 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 26 | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 28 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 29 | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 30 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 31 | * OTHER DEALINGS IN THE SOFTWARE. 32 | **/ 33 | 34 | #ifndef __FSAIS_SRC_PACKED_PAIR_HPP_INCLUDED 35 | #define __FSAIS_SRC_PACKED_PAIR_HPP_INCLUDED 36 | 37 | 38 | namespace fsais_private { 39 | 40 | template 41 | struct packed_pair { 42 | packed_pair() {} 43 | packed_pair(S &f, T &s) { 44 | first = f; 45 | second = s; 46 | } 47 | 48 | packed_pair(S f, T s) { 49 | first = f; 50 | second = s; 51 | } 52 | 53 | S first; 54 | T second; 55 | } __attribute__((packed)); 56 | 57 | } // namespace fsais_private 58 | 59 | #endif // __FSAIS_SRC_PACKED_PAIR_HPP_INCLUDED 60 | -------------------------------------------------------------------------------- /src/fsais_src/radix_heap.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file fsais_src/radix_heap.hpp 3 | * @section LICENCE 4 | * 5 | * This file is part of fSAIS v0.1.0 6 | * See: https://github.com/dominikkempa/fsais 7 | * 8 | * Copyright (C) 2016-2020 9 | * Dominik Kempa 10 | * Juha Karkkainen 11 | * 12 | * Permission is hereby granted, free of charge, to any person 13 | * obtaining a copy of this software and associated documentation 14 | * files (the "Software"), to deal in the Software without 15 | * restriction, including without limitation the rights to use, 16 | * copy, modify, merge, publish, distribute, sublicense, and/or sell 17 | * copies of the Software, and to permit persons to whom the 18 | * Software is furnished to do so, subject to the following 19 | * conditions: 20 | * 21 | * The above copyright notice and this permission notice shall be 22 | * included in all copies or substantial portions of the Software. 23 | * 24 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 26 | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 28 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 29 | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 30 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 31 | * OTHER DEALINGS IN THE SOFTWARE. 32 | **/ 33 | 34 | #ifndef __FSAIS_SRC_RADIX_HEAP_HPP_INCLUDED 35 | #define __FSAIS_SRC_RADIX_HEAP_HPP_INCLUDED 36 | 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include 42 | #include 43 | #include 44 | #include 45 | 46 | #include "utils.hpp" 47 | 48 | 49 | namespace fsais_private { 50 | 51 | template 52 | class radix_heap { 53 | static_assert(sizeof(KeyType) <= 8, 54 | "radix_heap: sizeof(KeyType) > 8!"); 55 | static_assert(std::is_unsigned::value, 56 | "radix_heap: KeyType not unsigned!"); 57 | 58 | public: 59 | typedef KeyType key_type; 60 | typedef ValueType value_type; 61 | typedef radix_heap radix_heap_type; 62 | 63 | private: 64 | template 65 | struct packed_pair { 66 | packed_pair() {} 67 | packed_pair(S &f, T &s) { 68 | first = f; 69 | second = s; 70 | } 71 | 72 | S first; 73 | T second; 74 | } __attribute__((packed)); 75 | 76 | private: 77 | struct queue_header { 78 | std::uint64_t m_head_page_id; 79 | std::uint64_t m_tail_page_id; 80 | std::uint64_t m_head_ptr; 81 | std::uint64_t m_tail_ptr; 82 | } __attribute__((packed)); 83 | 84 | private: 85 | typedef packed_pair pair_type; 86 | 87 | private: 88 | std::uint64_t m_size; 89 | std::uint64_t m_key_lower_bound; 90 | std::uint64_t m_bottom_level_queue_ptr; 91 | std::uint64_t m_min_compare_ptr; 92 | std::uint64_t m_queue_count; 93 | std::uint64_t m_bottom_level_radix; 94 | std::uint64_t m_pagesize; 95 | 96 | // Internal queue minimas. 97 | std::vector m_queue_min; 98 | 99 | // Lookup tables used to compute bucket ID. 100 | std::vector m_bin_len_to_level_id; 101 | std::vector m_level_mask; 102 | std::vector m_sum_of_radix_logs; 103 | std::vector m_sum_of_radixes; 104 | 105 | // Pointers used to locate the smallest non-empty queue. 106 | std::vector m_level_ptr; 107 | 108 | // Internal queues. 109 | std::uint64_t m_empty_pages_list_head; 110 | std::uint64_t *m_pages_next; 111 | pair_type *m_pages_mem; 112 | queue_header *m_queue_headers; 113 | 114 | private: 115 | inline bool is_internal_queue_empty(std::uint64_t queue_id) const { 116 | queue_header &h = m_queue_headers[queue_id]; 117 | return (h.m_tail_page_id == std::numeric_limits::max()) || 118 | (h.m_tail_page_id == h.m_head_page_id && h.m_tail_ptr == h.m_head_ptr); 119 | } 120 | 121 | inline pair_type& internal_queue_front(std::uint64_t queue_id) const { 122 | queue_header &h = m_queue_headers[queue_id]; 123 | return m_pages_mem[h.m_tail_page_id * m_pagesize + h.m_tail_ptr]; 124 | } 125 | 126 | inline void internal_queue_pop(std::uint64_t queue_id) { 127 | queue_header &h = m_queue_headers[queue_id]; 128 | ++h.m_tail_ptr; 129 | if (h.m_tail_ptr == m_pagesize) { 130 | std::uint64_t next_tail_page_id = m_pages_next[h.m_tail_page_id]; 131 | m_pages_next[h.m_tail_page_id] = m_empty_pages_list_head; 132 | m_empty_pages_list_head = h.m_tail_page_id; 133 | h.m_tail_page_id = next_tail_page_id; 134 | h.m_tail_ptr = 0; 135 | } else if (h.m_tail_ptr == h.m_head_ptr && h.m_tail_page_id == h.m_head_page_id) { 136 | m_pages_next[h.m_tail_page_id] = m_empty_pages_list_head; 137 | m_empty_pages_list_head = h.m_tail_page_id; 138 | h.m_tail_page_id = std::numeric_limits::max(); 139 | h.m_head_page_id = std::numeric_limits::max(); 140 | } 141 | } 142 | 143 | inline void internal_queue_push(std::uint64_t queue_id, pair_type x) { 144 | queue_header &h = m_queue_headers[queue_id]; 145 | if (h.m_head_page_id == std::numeric_limits::max()) { 146 | h.m_head_page_id = m_empty_pages_list_head; 147 | m_empty_pages_list_head = m_pages_next[m_empty_pages_list_head]; 148 | m_pages_next[h.m_head_page_id] = std::numeric_limits::max(); 149 | h.m_tail_page_id = h.m_head_page_id; 150 | h.m_head_ptr = 0; 151 | h.m_tail_ptr = 0; 152 | } 153 | 154 | m_pages_mem[h.m_head_page_id * m_pagesize + h.m_head_ptr++] = x; 155 | if (h.m_head_ptr == m_pagesize) { 156 | std::uint64_t new_head_page_id = m_empty_pages_list_head; 157 | m_empty_pages_list_head = m_pages_next[m_empty_pages_list_head]; 158 | m_pages_next[new_head_page_id] = std::numeric_limits::max(); 159 | m_pages_next[h.m_head_page_id] = new_head_page_id; 160 | h.m_head_page_id = new_head_page_id; 161 | h.m_head_ptr = 0; 162 | } 163 | } 164 | 165 | public: 166 | radix_heap(std::vector radix_logs, 167 | std::uint64_t max_items, 168 | std::uint64_t pagesize = 169 | #ifdef SAIS_DEBUG 170 | (std::uint64_t)1 171 | #else 172 | (std::uint64_t)4096 173 | #endif 174 | ) { 175 | m_pagesize = pagesize; 176 | std::uint64_t radix_logs_sum = std::accumulate(radix_logs.begin(), radix_logs.end(), 0UL); 177 | if (radix_logs_sum == 0) { 178 | fprintf(stderr, "\nError: radix_logs_sum == 0 in radix_heap constructor!\n"); 179 | std::exit(EXIT_FAILURE); 180 | } 181 | 182 | // Compute m_level_mask lookup table. 183 | m_level_mask = std::vector(radix_logs.size()); 184 | for (std::uint64_t i = 0; i < radix_logs.size(); ++i) 185 | m_level_mask[i] = (1UL << radix_logs[radix_logs.size() - 1 - i]) - 1; 186 | 187 | // Compute m_bin_len_to_level_id lookup table. 188 | m_bin_len_to_level_id = std::vector(radix_logs_sum + 1); 189 | std::uint64_t level_cnt = 0, ptr = 0; 190 | for (std::uint64_t i = radix_logs.size(); i > 0; --i) { 191 | for (std::uint64_t j = 0; j < radix_logs[i - 1]; ++j) 192 | m_bin_len_to_level_id[++ptr] = level_cnt; 193 | ++level_cnt; 194 | } 195 | 196 | // Compute m_sum_of_radix_logs lookup table. 197 | m_sum_of_radix_logs = std::vector(radix_logs.size()); 198 | for (std::uint64_t i = 0, s = 0; i < radix_logs.size(); ++i) { 199 | m_sum_of_radix_logs[i] = s; 200 | s += radix_logs[radix_logs.size() - 1 - i]; 201 | } 202 | 203 | // Compute m_sum_of_radixes lookup table. 204 | m_sum_of_radixes = std::vector(radix_logs.size() + 1); 205 | m_level_ptr = std::vector(radix_logs.size()); 206 | std::uint64_t sum_of_radixes = 0; 207 | for (std::uint64_t i = 0; i < radix_logs.size(); ++i) { 208 | m_sum_of_radixes[i] = sum_of_radixes - i; 209 | m_level_ptr[i] = m_sum_of_radixes[i] + 1; 210 | sum_of_radixes += (1UL << radix_logs[radix_logs.size() - 1 - i]); 211 | } 212 | m_sum_of_radixes[radix_logs.size()] = sum_of_radixes - radix_logs.size(); 213 | 214 | m_size = 0; 215 | m_key_lower_bound = 0; 216 | m_bottom_level_queue_ptr = 0; 217 | m_min_compare_ptr = 0; 218 | m_bottom_level_radix = (1UL << radix_logs.back()); 219 | 220 | m_queue_count = sum_of_radixes - (radix_logs.size() - 1); 221 | m_queue_min = std::vector(m_queue_count, 222 | std::numeric_limits::max()); 223 | std::uint64_t n_pages = max_items / m_pagesize + 224 | (std::uint64_t)2 * m_queue_count; 225 | 226 | m_pages_mem = utils::allocate_array(n_pages * m_pagesize); 227 | m_pages_next = utils::allocate_array(n_pages); 228 | m_queue_headers = utils::allocate_array(m_queue_count); 229 | m_empty_pages_list_head = 0; 230 | 231 | for (std::uint64_t i = 0; i < m_queue_count; ++i) { 232 | queue_header &h = m_queue_headers[i]; 233 | h.m_tail_page_id = std::numeric_limits::max(); 234 | h.m_head_page_id = std::numeric_limits::max(); 235 | } 236 | 237 | for (std::uint64_t i = 0; i < n_pages; ++i) { 238 | if (i + 1 != n_pages) m_pages_next[i] = i + 1; 239 | else m_pages_next[i] = std::numeric_limits::max(); 240 | } 241 | } 242 | 243 | private: 244 | inline std::uint64_t get_queue_id(key_type key) const { 245 | std::uint64_t x = (std::uint64_t)key; 246 | if (x == m_key_lower_bound) 247 | return (x & (m_bottom_level_radix - 1)); 248 | 249 | std::uint64_t level_id = m_bin_len_to_level_id[64 - __builtin_clzll(x ^ m_key_lower_bound)]; 250 | std::uint64_t bucket_id = (x >> m_sum_of_radix_logs[level_id]) & m_level_mask[level_id]; 251 | std::uint64_t queue_id = m_sum_of_radixes[level_id] + bucket_id; 252 | 253 | return queue_id; 254 | } 255 | 256 | public: 257 | inline void push(key_type key, value_type value) { 258 | ++m_size; 259 | std::uint64_t id = get_queue_id(key); 260 | internal_queue_push(id, pair_type(key, value)); 261 | m_queue_min[id] = std::min(m_queue_min[id], (std::uint64_t)key); 262 | m_min_compare_ptr = std::min(m_min_compare_ptr, id); 263 | } 264 | 265 | // Return true iff x <= key, where x is the 266 | // smallest element currently stored in the heap. 267 | inline bool min_compare(key_type key) { 268 | if (empty()) return false; 269 | if (!is_internal_queue_empty(m_min_compare_ptr)) 270 | return (m_queue_min[m_min_compare_ptr] <= (std::uint64_t)key); 271 | std::uint64_t id = get_queue_id(key); 272 | while (m_min_compare_ptr != id && is_internal_queue_empty(m_min_compare_ptr)) 273 | ++m_min_compare_ptr; 274 | return (!is_internal_queue_empty(m_min_compare_ptr) && 275 | m_queue_min[m_min_compare_ptr] <= (std::uint64_t)key); 276 | } 277 | 278 | // Remove and return the item with the smallest key. 279 | inline std::pair extract_min() { 280 | if (is_internal_queue_empty(m_bottom_level_queue_ptr)) 281 | redistribute(); 282 | pair_type p = internal_queue_front(m_bottom_level_queue_ptr); 283 | internal_queue_pop(m_bottom_level_queue_ptr); 284 | key_type key = p.first; 285 | value_type value = p.second; 286 | --m_size; 287 | return std::make_pair(key, value); 288 | } 289 | 290 | inline std::uint64_t size() const { 291 | return m_size; 292 | } 293 | 294 | inline bool empty() const { 295 | return m_size == 0; 296 | } 297 | 298 | ~radix_heap() { 299 | utils::deallocate(m_pages_mem); 300 | utils::deallocate(m_pages_next); 301 | utils::deallocate(m_queue_headers); 302 | } 303 | 304 | private: 305 | void redistribute() { 306 | while (m_bottom_level_queue_ptr < m_bottom_level_radix && is_internal_queue_empty(m_bottom_level_queue_ptr)) 307 | m_queue_min[m_bottom_level_queue_ptr++] = std::numeric_limits::max(); 308 | 309 | if (m_bottom_level_queue_ptr < m_bottom_level_radix) { 310 | m_key_lower_bound = m_queue_min[m_bottom_level_queue_ptr]; 311 | } else { 312 | // Find the non-empty queue with the smallest id. 313 | std::uint64_t level = 1; 314 | while (true) { 315 | // Scan current level. 316 | while (m_level_ptr[level] < m_sum_of_radixes[level + 1] + 1 && 317 | is_internal_queue_empty(m_level_ptr[level])) 318 | ++m_level_ptr[level]; 319 | 320 | // If not found, reset the level pointer 321 | // and move up. Otherwise break. 322 | if (m_level_ptr[level] == m_sum_of_radixes[level + 1] + 1) { 323 | m_level_ptr[level] = m_sum_of_radixes[level] + 1; 324 | ++level; 325 | } else break; 326 | } 327 | 328 | std::uint64_t id = m_level_ptr[level]; 329 | m_key_lower_bound = m_queue_min[id]; 330 | 331 | // Redistribute elements in internal queue. 332 | while (!is_internal_queue_empty(id)) { 333 | pair_type p = internal_queue_front(id); 334 | internal_queue_pop(id); 335 | std::uint64_t newid = get_queue_id(p.first); 336 | internal_queue_push(newid, p); 337 | m_queue_min[newid] = std::min(m_queue_min[newid], (std::uint64_t)p.first); 338 | } 339 | m_bottom_level_queue_ptr = get_queue_id(m_key_lower_bound); 340 | m_queue_min[id] = std::numeric_limits::max(); 341 | } 342 | m_min_compare_ptr = m_bottom_level_queue_ptr; 343 | } 344 | }; 345 | 346 | } // namespace fsais_private 347 | 348 | #endif // __FSAIS_SRC_RADIX_HEAP_HPP_INCLUDED 349 | -------------------------------------------------------------------------------- /src/fsais_src/utils.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file fsais_src/utils.cpp 3 | * @section LICENCE 4 | * 5 | * This file is part of fSAIS v0.1.0 6 | * See: https://github.com/dominikkempa/fsais 7 | * 8 | * Copyright (C) 2016-2020 9 | * Dominik Kempa 10 | * Juha Karkkainen 11 | * 12 | * Permission is hereby granted, free of charge, to any person 13 | * obtaining a copy of this software and associated documentation 14 | * files (the "Software"), to deal in the Software without 15 | * restriction, including without limitation the rights to use, 16 | * copy, modify, merge, publish, distribute, sublicense, and/or sell 17 | * copies of the Software, and to permit persons to whom the 18 | * Software is furnished to do so, subject to the following 19 | * conditions: 20 | * 21 | * The above copyright notice and this permission notice shall be 22 | * included in all copies or substantial portions of the Software. 23 | * 24 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 26 | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 28 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 29 | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 30 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 31 | * OTHER DEALINGS IN THE SOFTWARE. 32 | **/ 33 | 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include 42 | #include 43 | #include 44 | #include 45 | #include 46 | #include 47 | #include 48 | #include 49 | 50 | #include "utils.hpp" 51 | 52 | 53 | namespace fsais_private { 54 | namespace utils { 55 | 56 | std::mutex io_mutex; 57 | std::mutex allocator_mutex; 58 | std::uint64_t current_ram_allocation; 59 | std::uint64_t current_io_volume; 60 | std::uint64_t current_disk_allocation; 61 | std::uint64_t peak_ram_allocation; 62 | std::uint64_t peak_disk_allocation; 63 | 64 | void *allocate(const std::uint64_t bytes) { 65 | std::lock_guard lk(allocator_mutex); 66 | std::uint8_t * const ptr = 67 | (std::uint8_t *)malloc(bytes + 8); 68 | std::uint64_t * const ptr64 = (std::uint64_t *)ptr; 69 | *ptr64 = bytes; 70 | std::uint8_t * const ret = ptr + 8; 71 | current_ram_allocation += bytes; 72 | peak_ram_allocation = 73 | std::max(peak_ram_allocation, 74 | current_ram_allocation); 75 | return (void *)ret; 76 | } 77 | 78 | void *aligned_allocate( 79 | const std::uint64_t bytes, 80 | const std::uint64_t align) { 81 | std::uint8_t * const ptr = 82 | (std::uint8_t *)allocate(bytes + (align - 1) + 8); 83 | std::uint8_t *ptr2 = ptr + 8; 84 | const std::uint64_t n_blocks = 85 | ((std::uint64_t)ptr2 + align - 1) / align; 86 | ptr2 = (std::uint8_t *)(n_blocks * align); 87 | std::uint64_t * const ptr64 = (std::uint64_t *)(ptr2 - 8); 88 | *ptr64 = (std::uint64_t)ptr; 89 | return (void *)ptr2; 90 | } 91 | 92 | void deallocate(const void * const tab) { 93 | std::lock_guard lk(allocator_mutex); 94 | std::uint8_t * const ptr = (std::uint8_t *)tab - 8; 95 | const std::uint64_t * const ptr64 = (std::uint64_t *)ptr; 96 | const std::uint64_t bytes = *ptr64; 97 | current_ram_allocation -= bytes; 98 | free(ptr); 99 | } 100 | 101 | void aligned_deallocate(const void * const tab) { 102 | const std::uint8_t * const ptr = (std::uint8_t *)tab; 103 | const std::uint64_t * const ptr64 = (std::uint64_t *)(ptr - 8); 104 | deallocate((void *)(*ptr64)); 105 | } 106 | 107 | void initialize_stats() { 108 | current_ram_allocation = 0; 109 | current_disk_allocation = 0; 110 | current_io_volume = 0; 111 | peak_ram_allocation = 0; 112 | peak_disk_allocation = 0; 113 | } 114 | 115 | std::uint64_t get_current_ram_allocation() { 116 | return current_ram_allocation; 117 | } 118 | 119 | std::uint64_t get_peak_ram_allocation() { 120 | return peak_ram_allocation; 121 | } 122 | 123 | std::uint64_t get_current_io_volume() { 124 | return current_io_volume; 125 | } 126 | 127 | std::uint64_t get_current_disk_allocation() { 128 | return current_disk_allocation; 129 | } 130 | 131 | std::uint64_t get_peak_disk_allocation() { 132 | return peak_disk_allocation; 133 | } 134 | 135 | long double wclock() { 136 | timeval tim; 137 | gettimeofday(&tim, NULL); 138 | return tim.tv_sec + (tim.tv_usec / 1000000.0L); 139 | } 140 | 141 | void sleep(const long double duration_sec) { 142 | const long double timestamp = wclock(); 143 | while (wclock() - timestamp < duration_sec); 144 | } 145 | 146 | std::FILE *file_open( 147 | const std::string filename, 148 | const std::string mode) { 149 | std::FILE * const f = 150 | std::fopen(filename.c_str(), mode.c_str()); 151 | if (f == NULL) { 152 | std::perror(filename.c_str()); 153 | std::exit(EXIT_FAILURE); 154 | } 155 | return f; 156 | } 157 | 158 | std::FILE *file_open_nobuf( 159 | const std::string filename, 160 | const std::string mode) { 161 | std::FILE * const f = 162 | std::fopen(filename.c_str(), mode.c_str()); 163 | if (f == NULL) { 164 | std::perror(filename.c_str()); 165 | std::exit(EXIT_FAILURE); 166 | } 167 | if(std::setvbuf(f, NULL, _IONBF, 0) != 0) { 168 | perror("setvbuf failed"); 169 | std::exit(EXIT_FAILURE); 170 | } 171 | return f; 172 | } 173 | 174 | std::uint64_t file_size(const std::string filename) { 175 | std::FILE * const f = file_open_nobuf(filename, "r"); 176 | std::fseek(f, 0, SEEK_END); 177 | const long size = std::ftell(f); 178 | if (size < 0) { 179 | std::perror(filename.c_str()); 180 | std::exit(EXIT_FAILURE); 181 | } 182 | std::fclose(f); 183 | return (std::uint64_t)size; 184 | } 185 | 186 | bool file_exists(const std::string filename) { 187 | std::FILE * const f = std::fopen(filename.c_str(), "r"); 188 | const bool result = (f != NULL); 189 | if (f != NULL) 190 | std::fclose(f); 191 | return result; 192 | } 193 | 194 | void file_delete(const std::string filename) { 195 | 196 | #ifdef MONITOR_DISK_USAGE 197 | std::lock_guard lk(io_mutex); 198 | current_disk_allocation -= file_size(filename); 199 | #endif 200 | 201 | const int res = std::remove(filename.c_str()); 202 | if (res != 0) { 203 | std::perror(filename.c_str()); 204 | std::exit(EXIT_FAILURE); 205 | } 206 | } 207 | 208 | std::string absolute_path(std::string filename) { 209 | char path[1 << 12]; 210 | bool created = false; 211 | if (!file_exists(filename)) { 212 | std::fclose(file_open(filename, "w")); 213 | created = true; 214 | } 215 | if (!realpath(filename.c_str(), path)) { 216 | std::perror(filename.c_str()); 217 | std::exit(EXIT_FAILURE); 218 | } 219 | if (created) 220 | file_delete(filename); 221 | return std::string(path); 222 | } 223 | 224 | void empty_page_cache(const std::string filename) { 225 | const int fd = open(filename.c_str(), O_RDWR); 226 | if (fd == -1) { 227 | std::perror(filename.c_str()); 228 | std::exit(EXIT_FAILURE); 229 | } 230 | const off_t length = lseek(fd, 0, SEEK_END); 231 | lseek(fd, 0L, SEEK_SET); 232 | posix_fadvise(fd, 0, length, POSIX_FADV_DONTNEED); 233 | close(fd); 234 | } 235 | 236 | std::string get_timestamp() { 237 | const std::time_t result = std::time(NULL); 238 | return std::string(std::ctime(&result)); 239 | } 240 | 241 | std::int32_t random_int32( 242 | const std::int32_t p, 243 | const std::int32_t r) { 244 | return p + rand() % (r - p + 1); 245 | } 246 | 247 | std::int64_t random_int64( 248 | const std::int64_t p, 249 | const std::int64_t r) { 250 | const std::uint64_t r30 = RAND_MAX * rand() + rand(); 251 | const std::uint64_t s30 = RAND_MAX * rand() + rand(); 252 | const std::uint64_t t4 = rand() & 0xf; 253 | const std::uint64_t r64 = (r30 << 34) + (s30 << 4) + t4; 254 | return p + r64 % (r - p + 1); 255 | } 256 | 257 | void fill_random_string( 258 | std::uint8_t * const &s, 259 | const std::uint64_t length, 260 | const std::uint64_t sigma) { 261 | for (std::uint64_t i = 0; i < length; ++i) 262 | s[i] = random_int32(0, sigma - 1); 263 | } 264 | 265 | void fill_random_letters( 266 | std::uint8_t * const &s, 267 | const std::uint64_t length, 268 | const std::uint64_t sigma) { 269 | fill_random_string(s, length, sigma); 270 | for (std::uint64_t i = 0; i < length; ++i) 271 | s[i] += 'a'; 272 | } 273 | 274 | std::string random_string_hash() { 275 | const uint64_t hash = 276 | (uint64_t)rand() * RAND_MAX + rand(); 277 | std::stringstream ss; 278 | ss << hash; 279 | return ss.str(); 280 | } 281 | 282 | std::uint64_t log2ceil(const std::uint64_t x) { 283 | std::uint64_t pow2 = 1; 284 | std::uint64_t w = 0; 285 | while (pow2 < x) { 286 | pow2 <<= 1; 287 | ++w; 288 | } 289 | return w; 290 | } 291 | 292 | std::uint64_t log2floor(const std::uint64_t x) { 293 | std::uint64_t pow2 = 1; 294 | std::uint64_t w = 0; 295 | while ((pow2 << 1) <= x) { 296 | pow2 <<= 1; 297 | ++w; 298 | } 299 | return w; 300 | } 301 | 302 | } // namespace utils 303 | } // namespace fsais_private 304 | -------------------------------------------------------------------------------- /src/fsais_src/utils.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file fsais_src/utils.hpp 3 | * @section LICENCE 4 | * 5 | * This file is part of fSAIS v0.1.0 6 | * See: https://github.com/dominikkempa/fsais 7 | * 8 | * Copyright (C) 2016-2020 9 | * Dominik Kempa 10 | * Juha Karkkainen 11 | * 12 | * Permission is hereby granted, free of charge, to any person 13 | * obtaining a copy of this software and associated documentation 14 | * files (the "Software"), to deal in the Software without 15 | * restriction, including without limitation the rights to use, 16 | * copy, modify, merge, publish, distribute, sublicense, and/or sell 17 | * copies of the Software, and to permit persons to whom the 18 | * Software is furnished to do so, subject to the following 19 | * conditions: 20 | * 21 | * The above copyright notice and this permission notice shall be 22 | * included in all copies or substantial portions of the Software. 23 | * 24 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 26 | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 28 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 29 | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 30 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 31 | * OTHER DEALINGS IN THE SOFTWARE. 32 | **/ 33 | 34 | #ifndef __FSAIS_SRC_UTILS_HPP_INCLUDED 35 | #define __FSAIS_SRC_UTILS_HPP_INCLUDED 36 | 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include 42 | #include 43 | 44 | 45 | namespace fsais_private { 46 | namespace utils { 47 | 48 | extern std::uint64_t current_ram_allocation; 49 | extern std::uint64_t current_io_volume; 50 | extern std::uint64_t current_disk_allocation; 51 | extern std::uint64_t peak_ram_allocation; 52 | extern std::uint64_t peak_disk_allocation; 53 | extern std::mutex io_mutex; 54 | 55 | long double wclock(); 56 | void sleep(const long double); 57 | 58 | void *allocate(const std::uint64_t); 59 | void *aligned_allocate(const std::uint64_t, const std::uint64_t); 60 | void deallocate(const void * const); 61 | void aligned_deallocate(const void * const); 62 | 63 | void initialize_stats(); 64 | std::uint64_t get_current_ram_allocation(); 65 | std::uint64_t get_peak_ram_allocation(); 66 | std::uint64_t get_current_io_volume(); 67 | std::uint64_t get_current_disk_allocation(); 68 | std::uint64_t get_peak_disk_allocation(); 69 | 70 | template 71 | value_type *allocate_array(const std::uint64_t size) { 72 | return (value_type *)allocate(size * sizeof(value_type)); 73 | } 74 | 75 | template 76 | value_type *aligned_allocate_array( 77 | const std::uint64_t size, 78 | const std::uint64_t align) { 79 | return (value_type *)aligned_allocate(size * sizeof(value_type), align); 80 | } 81 | 82 | std::FILE *file_open(const std::string, const std::string); 83 | std::FILE *file_open_nobuf(const std::string, const std::string); 84 | std::uint64_t file_size(const std::string); 85 | bool file_exists(const std::string); 86 | void file_delete(const std::string); 87 | std::string absolute_path(const std::string); 88 | void empty_page_cache(const std::string); 89 | std::string get_timestamp(); 90 | 91 | template 92 | void write_to_file( 93 | const value_type * const src, 94 | const std::uint64_t length, 95 | std::FILE * const f) { 96 | 97 | #ifdef MONITOR_DISK_USAGE 98 | std::lock_guard lk(io_mutex); 99 | #endif 100 | 101 | const std::uint64_t fwrite_ret = 102 | std::fwrite(src, sizeof(value_type), length, f); 103 | 104 | #ifdef MONITOR_DISK_USAGE 105 | current_io_volume += sizeof(value_type) * length; 106 | current_disk_allocation += sizeof(value_type) * length; 107 | peak_disk_allocation = 108 | std::max(peak_disk_allocation, current_disk_allocation); 109 | #endif 110 | 111 | if (fwrite_ret != length) { 112 | fprintf(stderr, "\nError: fwrite failed.\n"); 113 | std::exit(EXIT_FAILURE); 114 | } 115 | } 116 | 117 | template 118 | void write_to_file_inplace( 119 | const value_type * const src, 120 | const std::uint64_t length, 121 | std::FILE * const f) { 122 | 123 | #ifdef MONITOR_DISK_USAGE 124 | std::lock_guard lk(io_mutex); 125 | #endif 126 | 127 | const std::uint64_t fwrite_ret = 128 | std::fwrite(src, sizeof(value_type), length, f); 129 | 130 | #ifdef MONITOR_DISK_USAGE 131 | current_io_volume += sizeof(value_type) * length; 132 | #endif 133 | 134 | if (fwrite_ret != length) { 135 | fprintf(stderr, "\nError: fwrite failed.\n"); 136 | std::exit(EXIT_FAILURE); 137 | } 138 | } 139 | 140 | template 141 | void write_to_file( 142 | const value_type * const src, 143 | const std::uint64_t length, 144 | const std::string filename) { 145 | std::FILE * const f = file_open_nobuf(filename, "w"); 146 | write_to_file(src, length, f); 147 | std::fclose(f); 148 | } 149 | 150 | template 151 | void overwrite_at_offset( 152 | const value_type * const src, 153 | const std::uint64_t offset, 154 | const std::uint64_t length, 155 | std::FILE * const f) { 156 | std::fseek(f, sizeof(value_type) * offset, SEEK_SET); 157 | write_to_file_inplace(src, length, f); 158 | } 159 | 160 | template 161 | void read_from_file( 162 | value_type * const dest, 163 | const std::uint64_t length, 164 | std::FILE * const f) { 165 | 166 | #ifdef MONITOR_DISK_USAGE 167 | std::lock_guard lk(io_mutex); 168 | #endif 169 | 170 | const std::uint64_t fread_ret = 171 | std::fread(dest, sizeof(value_type), length, f); 172 | 173 | #ifdef MONITOR_DISK_USAGE 174 | current_io_volume += sizeof(value_type) * length; 175 | #endif 176 | 177 | if (fread_ret != length) { 178 | fprintf(stderr, "\nError: fread failed.\n"); 179 | std::exit(EXIT_FAILURE); 180 | } 181 | } 182 | 183 | template 184 | void read_from_file( 185 | value_type * const dest, 186 | const std::uint64_t length, 187 | const std::string filename) { 188 | std::FILE * const f = file_open_nobuf(filename, "r"); 189 | read_from_file(dest, length, f); 190 | std::fclose(f); 191 | } 192 | 193 | template 194 | void read_from_file( 195 | value_type * const dest, 196 | const std::uint64_t max_items, 197 | std::uint64_t &items_read, 198 | std::FILE * const f) { 199 | 200 | #ifdef MONITOR_DISK_USAGE 201 | std::lock_guard lk(io_mutex); 202 | #endif 203 | 204 | items_read = std::fread(dest, sizeof(value_type), max_items, f); 205 | 206 | #ifdef MONITOR_DISK_USAGE 207 | current_io_volume += sizeof(value_type) * items_read; 208 | #endif 209 | 210 | if (std::ferror(f)) { 211 | fprintf(stderr, "\nError: fread failed.\n"); 212 | std::exit(EXIT_FAILURE); 213 | } 214 | } 215 | 216 | template 217 | void read_at_offset( 218 | value_type * const dest, 219 | const std::uint64_t offset, 220 | const std::uint64_t length, 221 | std::FILE * const f) { 222 | std::fseek(f, offset, SEEK_SET); 223 | read_from_file(dest, length, f); 224 | } 225 | 226 | template 227 | void read_at_offset( 228 | value_type * const dest, 229 | const std::uint64_t offset, 230 | const std::uint64_t length, 231 | const std::string filename) { 232 | std::FILE * const f = file_open_nobuf(filename, "r"); 233 | read_at_offset(dest, offset, length, f); 234 | std::fclose(f); 235 | } 236 | 237 | std::int32_t random_int32(const std::int32_t, const std::int32_t); 238 | std::int64_t random_int64(const std::int64_t, const std::int64_t); 239 | void fill_random_string(std::uint8_t * const &, 240 | const std::uint64_t, const std::uint64_t); 241 | void fill_random_letters(std::uint8_t * const &, 242 | const std::uint64_t, const std::uint64_t); 243 | std::string random_string_hash(); 244 | 245 | std::uint64_t log2ceil(const std::uint64_t); 246 | std::uint64_t log2floor(const std::uint64_t); 247 | 248 | template 249 | std::string intToStr(const int_type x) { 250 | std::stringstream ss; 251 | ss << x; 252 | return ss.str(); 253 | } 254 | 255 | template 256 | int_type gcd( 257 | const int_type a, 258 | const int_type b) { 259 | if (b == (int_type)0) return a; 260 | else return gcd(b, a % b); 261 | } 262 | 263 | template 264 | int_type lcm( 265 | const int_type a, 266 | const int_type b) { 267 | return (a / gcd(a, b)) * b; 268 | } 269 | 270 | template 271 | std::uint64_t disk_block_size(const std::uint64_t ram_budget) { 272 | const std::uint64_t opt_block_size 273 | = lcm((std::uint64_t)BUFSIZ, (std::uint64_t)sizeof(value_type)); 274 | 275 | std::uint64_t result = 0; 276 | if (ram_budget < opt_block_size) { 277 | result = std::max((std::uint64_t)1, 278 | (std::uint64_t)(ram_budget / sizeof(value_type))); 279 | } else { 280 | const std::uint64_t opt_block_count = ram_budget / opt_block_size; 281 | const std::uint64_t opt_blocks_bytes = opt_block_count * opt_block_size; 282 | result = opt_blocks_bytes / sizeof(value_type); 283 | } 284 | 285 | return result; 286 | } 287 | 288 | } // namespace utils 289 | } // namespace fsais_private 290 | 291 | #endif // __FSAIS_SRC_UTILS_HPP_INCLUDED 292 | -------------------------------------------------------------------------------- /src/main.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file src/main.cpp 3 | * @section LICENCE 4 | * 5 | * This file is part of fSAIS v0.1.0 6 | * See: https://github.com/dominikkempa/fsais 7 | * 8 | * Copyright (C) 2016-2020 9 | * Dominik Kempa 10 | * Juha Karkkainen 11 | * 12 | * Permission is hereby granted, free of charge, to any person 13 | * obtaining a copy of this software and associated documentation 14 | * files (the "Software"), to deal in the Software without 15 | * restriction, including without limitation the rights to use, 16 | * copy, modify, merge, publish, distribute, sublicense, and/or sell 17 | * copies of the Software, and to permit persons to whom the 18 | * Software is furnished to do so, subject to the following 19 | * conditions: 20 | * 21 | * The above copyright notice and this permission notice shall be 22 | * included in all copies or substantial portions of the Software. 23 | * 24 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 26 | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 28 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 29 | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 30 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 31 | * OTHER DEALINGS IN THE SOFTWARE. 32 | **/ 33 | 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include 42 | #include 43 | 44 | #include "fsais_src/em_compute_sa.hpp" 45 | #include "uint40.hpp" 46 | 47 | 48 | char *program_name; 49 | 50 | void usage(int status) { 51 | printf( 52 | 53 | "Usage: %s [OPTION]... FILE\n" 54 | "Construct the suffix array of text stored in FILE.\n" 55 | "\n" 56 | "Mandatory arguments to long options are mandatory for short options too.\n" 57 | " -h, --help display this help and exit\n" 58 | " -m, --mem=MEM use MEM bytes of RAM for computation. Metric and IEC\n" 59 | " suffixes are recognized, e.g., -l 10k, -l 1Mi, -l 3G\n" 60 | " gives MEM = 10^4, 2^20, 3*10^6. Default: 3584Mi\n" 61 | " -o, --output=OUTFILE specify output filename. Default: FILE.saX, where\n" 62 | " X = integer size used to encode the suffix array\n" 63 | " (5 bytes by default)\n", 64 | 65 | program_name); 66 | 67 | std::exit(status); 68 | } 69 | 70 | bool file_exists(std::string filename) { 71 | std::FILE *f = std::fopen(filename.c_str(), "r"); 72 | bool ret = (f != NULL); 73 | if (f != NULL) std::fclose(f); 74 | 75 | return ret; 76 | } 77 | 78 | template 79 | std::string intToStr(int_type x) { 80 | std::stringstream ss; 81 | ss << x; 82 | return ss.str(); 83 | } 84 | 85 | template 86 | bool parse_number(char *str, int_type *ret) { 87 | *ret = 0; 88 | std::uint64_t n_digits = 0; 89 | std::uint64_t str_len = std::strlen(str); 90 | while (n_digits < str_len && std::isdigit(str[n_digits])) { 91 | std::uint64_t digit = str[n_digits] - '0'; 92 | *ret = (*ret) * 10 + digit; 93 | ++n_digits; 94 | } 95 | 96 | if (n_digits == 0) 97 | return false; 98 | 99 | std::uint64_t suffix_length = str_len - n_digits; 100 | if (suffix_length > 0) { 101 | if (suffix_length > 2) 102 | return false; 103 | 104 | for (std::uint64_t j = 0; j < suffix_length; ++j) 105 | str[n_digits + j] = std::tolower(str[n_digits + j]); 106 | if (suffix_length == 2 && str[n_digits + 1] != 'i') 107 | return false; 108 | 109 | switch(str[n_digits]) { 110 | case 'k': 111 | if (suffix_length == 1) 112 | *ret *= 1000; 113 | else 114 | *ret <<= 10; 115 | break; 116 | case 'm': 117 | if (suffix_length == 1) 118 | *ret *= 1000000; 119 | else 120 | *ret <<= 20; 121 | break; 122 | case 'g': 123 | if (suffix_length == 1) 124 | *ret *= 1000000000; 125 | else 126 | *ret <<= 30; 127 | break; 128 | case 't': 129 | if (suffix_length == 1) 130 | *ret *= 1000000000000; 131 | else 132 | *ret <<= 40; 133 | break; 134 | default: 135 | return false; 136 | } 137 | } 138 | 139 | return true; 140 | } 141 | 142 | int main(int argc, char **argv) { 143 | srand(time(0) + getpid()); 144 | program_name = argv[0]; 145 | 146 | static struct option long_options[] = { 147 | {"help", no_argument, NULL, 'h'}, 148 | {"mem", required_argument, NULL, 'm'}, 149 | {"output", required_argument, NULL, 'o'}, 150 | {NULL, 0, NULL, 0} 151 | }; 152 | 153 | std::uint64_t ram_use = ((std::uint64_t)3584 << 20); 154 | std::string output_filename(""); 155 | 156 | // Parse command-line options. 157 | int c; 158 | while ((c = getopt_long(argc, argv, "hm:o:", 159 | long_options, NULL)) != -1) { 160 | switch(c) { 161 | case 'h': 162 | usage(EXIT_FAILURE); 163 | break; 164 | case 'm': 165 | { 166 | bool ok = parse_number(optarg, &ram_use); 167 | if (!ok) { 168 | fprintf(stderr, "Error: parsing RAM " 169 | "limit (%s) failed\n\n", optarg); 170 | usage(EXIT_FAILURE); 171 | } 172 | if (ram_use == 0) { 173 | fprintf(stderr, "Error: invalid RAM limit (%lu)\n\n", ram_use); 174 | usage(EXIT_FAILURE); 175 | } 176 | break; 177 | } 178 | case 'o': 179 | output_filename = std::string(optarg); 180 | break; 181 | default: 182 | usage(EXIT_FAILURE); 183 | break; 184 | } 185 | } 186 | 187 | if (optind >= argc) { 188 | fprintf(stderr, "Error: FILE not provided\n\n"); 189 | usage(EXIT_FAILURE); 190 | } 191 | 192 | // Parse the text filename. 193 | std::string text_filename = std::string(argv[optind++]); 194 | if (optind < argc) { 195 | fprintf(stderr, "Warning: multiple input files provided. " 196 | "Only the first will be processed.\n"); 197 | } 198 | 199 | // TODO: eliminate hardcoded values/types. 200 | typedef std::uint8_t/*std::uint32_t*/ char_type; 201 | typedef uint40 text_offset_type; 202 | std::uint64_t text_alphabet_size = 256/*100000000*/; 203 | 204 | // Set default output filename (if not provided). 205 | if (output_filename.empty()) 206 | output_filename = text_filename + ".sa" + 207 | intToStr(sizeof(text_offset_type)); 208 | 209 | // Check for the existence of text. 210 | if (!file_exists(text_filename)) { 211 | fprintf(stderr, "Error: input file (%s) does not exist\n\n", 212 | text_filename.c_str()); 213 | usage(EXIT_FAILURE); 214 | } 215 | 216 | if (file_exists(output_filename)) { 217 | 218 | // Output file exists, should we proceed? 219 | char *line = NULL; 220 | std::uint64_t buflen = 0; 221 | std::int64_t len = 0L; 222 | 223 | do { 224 | printf("Output file (%s) exists. Overwrite? [y/n]: ", 225 | output_filename.c_str()); 226 | if ((len = getline(&line, &buflen, stdin)) == -1) { 227 | printf("\nError: failed to read answer\n\n"); 228 | std::fflush(stdout); 229 | usage(EXIT_FAILURE); 230 | } 231 | } while (len != 2 || (line[0] != 'y' && line[0] != 'n')); 232 | 233 | if (line[0] == 'n') { 234 | free(line); 235 | std::exit(EXIT_FAILURE); 236 | } 237 | free(line); 238 | } 239 | 240 | fsais_private::em_compute_sa< 241 | char_type, 242 | text_offset_type>( 243 | ram_use, 244 | text_alphabet_size, 245 | text_filename, 246 | output_filename); 247 | } 248 | -------------------------------------------------------------------------------- /src/uint24.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file uint24.hpp 3 | * @section LICENCE 4 | * 5 | * This file is part of fSAIS v0.1.0 6 | * See: https://github.com/dominikkempa/fsais 7 | * 8 | * Copyright (C) 2016-2020 9 | * Dominik Kempa 10 | * Juha Karkkainen 11 | * 12 | * Permission is hereby granted, free of charge, to any person 13 | * obtaining a copy of this software and associated documentation 14 | * files (the "Software"), to deal in the Software without 15 | * restriction, including without limitation the rights to use, 16 | * copy, modify, merge, publish, distribute, sublicense, and/or sell 17 | * copies of the Software, and to permit persons to whom the 18 | * Software is furnished to do so, subject to the following 19 | * conditions: 20 | * 21 | * The above copyright notice and this permission notice shall be 22 | * included in all copies or substantial portions of the Software. 23 | * 24 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 26 | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 28 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 29 | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 30 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 31 | * OTHER DEALINGS IN THE SOFTWARE. 32 | **/ 33 | 34 | #ifndef __UINT24_HPP_INCLUDED 35 | #define __UINT24_HPP_INCLUDED 36 | 37 | #include 38 | #include 39 | 40 | 41 | class uint24 { 42 | private: 43 | std::uint16_t low; 44 | std::uint8_t high; 45 | 46 | public: 47 | uint24() {} 48 | uint24(std::uint16_t l, std::uint8_t h) : low(l), high(h) {} 49 | uint24(const uint24& a) : low(a.low), high(a.high) {} 50 | uint24(const std::int32_t& a) : low(a & 0xFFFF), high((a >> 16) & 0xFF) {} 51 | uint24(const std::uint32_t& a) : low(a & 0xFFFF), high((a >> 16) & 0xFF) {} 52 | uint24(const std::uint64_t& a) : low(a & 0xFFFF), high((a >> 16) & 0xFF) {} 53 | uint24(const std::int64_t& a) : low(a & 0xFFFFL), high((a >> 16) & 0xFF) {} 54 | 55 | inline operator uint64_t() const { return (((uint64_t)high) << 16) | (uint64_t)low; } 56 | inline bool operator == (const uint24& b) const { return (low == b.low) && (high == b.high); } 57 | inline bool operator != (const uint24& b) const { return (low != b.low) || (high != b.high); } 58 | } __attribute__((packed)); 59 | 60 | namespace std { 61 | 62 | template<> 63 | struct is_unsigned { 64 | public: 65 | static const bool value = true; 66 | }; 67 | 68 | template<> 69 | class numeric_limits { 70 | public: 71 | static uint24 min() { 72 | return uint24(std::numeric_limits::min(), 73 | std::numeric_limits::min()); 74 | } 75 | 76 | static uint24 max() { 77 | return uint24(std::numeric_limits::max(), 78 | std::numeric_limits::max()); 79 | } 80 | }; 81 | 82 | } // namespace std 83 | 84 | #endif // __UINT24_HPP_INCLUDED 85 | -------------------------------------------------------------------------------- /src/uint40.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file uint40.hpp 3 | * @section LICENCE 4 | * 5 | * This file is part of fSAIS v0.1.0 6 | * See: https://github.com/dominikkempa/fsais 7 | * 8 | * Copyright (C) 2016-2020 9 | * Dominik Kempa 10 | * Juha Karkkainen 11 | * 12 | * Permission is hereby granted, free of charge, to any person 13 | * obtaining a copy of this software and associated documentation 14 | * files (the "Software"), to deal in the Software without 15 | * restriction, including without limitation the rights to use, 16 | * copy, modify, merge, publish, distribute, sublicense, and/or sell 17 | * copies of the Software, and to permit persons to whom the 18 | * Software is furnished to do so, subject to the following 19 | * conditions: 20 | * 21 | * The above copyright notice and this permission notice shall be 22 | * included in all copies or substantial portions of the Software. 23 | * 24 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 26 | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 28 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 29 | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 30 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 31 | * OTHER DEALINGS IN THE SOFTWARE. 32 | **/ 33 | 34 | #ifndef __UINT40_HPP_INCLUDED 35 | #define __UINT40_HPP_INCLUDED 36 | 37 | #include 38 | #include 39 | 40 | 41 | class uint40 { 42 | private: 43 | std::uint32_t low; 44 | std::uint8_t high; 45 | 46 | public: 47 | uint40() {} 48 | uint40(std::uint32_t l, std::uint8_t h) : low(l), high(h) {} 49 | uint40(const uint40& a) : low(a.low), high(a.high) {} 50 | uint40(const std::int32_t& a) : low(a), high(0) {} 51 | uint40(const std::uint32_t& a) : low(a), high(0) {} 52 | uint40(const std::uint64_t& a) : 53 | low(a & 0xFFFFFFFF), high((a >> 32) & 0xFF) {} 54 | uint40(const std::int64_t& a) : 55 | low(a & 0xFFFFFFFFL), high((a >> 32) & 0xFF) {} 56 | 57 | inline operator uint64_t() const { 58 | return (((std::uint64_t)high) << 32) | (std::uint64_t)low; } 59 | inline bool operator == (const uint40& b) const { 60 | return (low == b.low) && (high == b.high); } 61 | inline bool operator != (const uint40& b) const { 62 | return (low != b.low) || (high != b.high); } 63 | } __attribute__((packed)); 64 | 65 | namespace std { 66 | 67 | template<> 68 | struct is_unsigned { 69 | public: 70 | static const bool value = true; 71 | }; 72 | 73 | template<> 74 | class numeric_limits { 75 | public: 76 | static uint40 min() { 77 | return uint40(std::numeric_limits::min(), 78 | std::numeric_limits::min()); 79 | } 80 | 81 | static uint40 max() { 82 | return uint40(std::numeric_limits::max(), 83 | std::numeric_limits::max()); 84 | } 85 | }; 86 | 87 | } // namespace std 88 | 89 | #endif // __UINT40_HPP_INCLUDED 90 | -------------------------------------------------------------------------------- /src/uint48.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file uint48.hpp 3 | * @section LICENCE 4 | * 5 | * This file is part of fSAIS v0.1.0 6 | * See: https://github.com/dominikkempa/fsais 7 | * 8 | * Copyright (C) 2016-2020 9 | * Dominik Kempa 10 | * Juha Karkkainen 11 | * 12 | * Permission is hereby granted, free of charge, to any person 13 | * obtaining a copy of this software and associated documentation 14 | * files (the "Software"), to deal in the Software without 15 | * restriction, including without limitation the rights to use, 16 | * copy, modify, merge, publish, distribute, sublicense, and/or sell 17 | * copies of the Software, and to permit persons to whom the 18 | * Software is furnished to do so, subject to the following 19 | * conditions: 20 | * 21 | * The above copyright notice and this permission notice shall be 22 | * included in all copies or substantial portions of the Software. 23 | * 24 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 26 | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 28 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 29 | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 30 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 31 | * OTHER DEALINGS IN THE SOFTWARE. 32 | **/ 33 | 34 | #ifndef __UINT48_HPP_INCLUDED 35 | #define __UINT48_HPP_INCLUDED 36 | 37 | #include 38 | #include 39 | 40 | 41 | class uint48 { 42 | private: 43 | std::uint32_t low; 44 | std::uint16_t high; 45 | 46 | public: 47 | uint48() {} 48 | uint48(std::uint32_t l, std::uint16_t h) : low(l), high(h) {} 49 | uint48(const uint48& a) : low(a.low), high(a.high) {} 50 | uint48(const std::int32_t& a) : low(a), high(0) {} 51 | uint48(const std::uint32_t& a) : low(a), high(0) {} 52 | uint48(const std::uint64_t& a) : 53 | low(a & 0xFFFFFFFF), high((a >> 32) & 0xFFFF) {} 54 | uint48(const std::int64_t& a) : 55 | low(a & 0xFFFFFFFFL), high((a >> 32) & 0xFFFF) {} 56 | 57 | inline operator uint64_t() const { 58 | return (((std::uint64_t)high) << 32) | (std::uint64_t)low; } 59 | inline bool operator == (const uint48& b) const { 60 | return (low == b.low) && (high == b.high); } 61 | inline bool operator != (const uint48& b) const { 62 | return (low != b.low) || (high != b.high); } 63 | } __attribute__((packed)); 64 | 65 | namespace std { 66 | 67 | template<> 68 | struct is_unsigned { 69 | public: 70 | static const bool value = true; 71 | }; 72 | 73 | template<> 74 | class numeric_limits { 75 | public: 76 | static uint48 min() { 77 | return uint48(std::numeric_limits::min(), 78 | std::numeric_limits::min()); 79 | } 80 | 81 | static uint48 max() { 82 | return uint48(std::numeric_limits::max(), 83 | std::numeric_limits::max()); 84 | } 85 | }; 86 | 87 | } // namespace std 88 | 89 | #endif // __UINT48_HPP_INCLUDED 90 | --------------------------------------------------------------------------------