├── LICENSE ├── README.md ├── brc.cpp ├── brc.hpp ├── common.hpp ├── main.cpp ├── make_avx.bat ├── make_sse2.bat └── make_std.bat /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Lucas Marsh 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Behemoth-Rank-Coding 2 | Fast and Strong Burrows Wheeler Model 3 | 4 | BRC is a very careful implementation of non-sequential move to front coding, the encoder is fully vectorized and decoder is partially vectorized. 5 | 6 | BRC acheives compression rates on par with QLFC when paired with an order-0 entropy coder, and BRC can operate in parallel via OpenMP. 7 | 8 | Here's some numbers from BRC on the enwik9.bwt test file, tests were run on an i7-7700HQ @ 3.5Ghz. 9 | 10 | Note: time to compress via fpaqc and FSE (32KB blocks) is not included, these are purely timings of the BRC transform itself. 11 | 12 | Implementation | Encode speed | Decode speed| Compressed size (via fpaqc)| Compressed size (via FSE) | 13 | -----------------------|--------------|-------------|---------------------------|---------------------------- 14 | BRC4_AVX 16-threads | 483 MB/s | 548 MB/s | 168,556,196 bytes | 178,988,019 bytes | 15 | BRC4_AVX 8-threads | 456 MB/s | 486 MB/s | 168,556,196 bytes | 178,988,019 bytes | 16 | BRC4_AVX 4-threads | 303 MB/s | 316 MB/s | 168,556,196 bytes | 178,988,019 bytes | 17 | BRC4_AVX 1-thread | 99 MB/s | 108 MB/s | 168,556,196 bytes | 178,988,019 bytes | 18 | -------------------------------------------------------------------------------- /brc.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | BRC - Behemoth Rank Coding for BWT 3 | MIT License 4 | Copyright (c) 2018 Lucas Marsh 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | */ 9 | #include "brc.hpp" 10 | 11 | #define BRC_VSRC_FOOTER_SIZE (sizeof(uint32_t) * 256) 12 | #define BRC_RLT_FOOTER_SIZE (1) 13 | #define BRC_PAD_SIZE (16) 14 | 15 | /*** basic utilities **/ 16 | void brc_memcopy_separate(void * dst, void * src, size_t size) { 17 | unsigned char * s = (unsigned char*)src; 18 | unsigned char * d = (unsigned char*)dst; 19 | if((((size_t)s) % 8) == 0 && (((size_t)d) % 8) == 0) { 20 | size_t i = 0; 21 | while((i + 8) < size) { 22 | *((uint64_t*)&d[i]) = *((uint64_t*)&s[i]); 23 | i += 8; 24 | } 25 | while(i < size) { 26 | d[i] = s[i]; 27 | i++; 28 | } 29 | } else { 30 | memcpy(dst, src, size); 31 | } 32 | } 33 | 34 | void * brc_aligned_malloc(size_t bytes, size_t alignment) { 35 | char * base_ptr; 36 | char * aligned_ptr; 37 | 38 | size_t pad = alignment + sizeof(size_t); 39 | if((base_ptr =(char*)malloc(bytes + pad)) == NULL) 40 | return NULL; 41 | size_t addr = (size_t)(base_ptr + pad); 42 | aligned_ptr = (char *)(addr - (addr % alignment)); 43 | *((size_t*)aligned_ptr - 1) = (size_t)base_ptr; 44 | 45 | return (void*)aligned_ptr; 46 | } 47 | 48 | void brc_aligned_free(void * aligned_ptr) { 49 | free( (char*)(*((size_t*)aligned_ptr - 1)) ); 50 | } 51 | 52 | /*** bytewise zero run length coder ***/ 53 | size_t rlt_forwards(unsigned char * src, unsigned char * dst, size_t size) { 54 | unsigned char * write_head = dst; 55 | unsigned char * write_end = write_head + size; 56 | size_t i = 0; 57 | while(i < size && write_head < write_end) { 58 | if(src[i] == 0) { 59 | size_t run = 1; 60 | while ((i + run) < size && src[i] == src[i + run]) 61 | run++; 62 | i += run; 63 | size_t L = run + 1; 64 | size_t msb = 0; 65 | asm("bsrq %1,%0" : "=r"(msb) : "r"(L)); 66 | while(msb--) 67 | *write_head++ = (L >> msb) & 1; 68 | } else if (src[i] >= 0xfe) { 69 | *write_head++ = 0xff; 70 | *write_head++ = src[i++] == 0xff; 71 | } else { 72 | *write_head++ = src[i++] + 1; 73 | } 74 | } 75 | if(i < size) { 76 | brc_memcopy_separate(dst, src, size); 77 | *(dst + size) = 0; 78 | return size + BRC_RLT_FOOTER_SIZE; 79 | } else { 80 | size_t packed_size = write_head - dst; 81 | *(dst + packed_size) = 1; 82 | return packed_size + BRC_RLT_FOOTER_SIZE; 83 | } 84 | } 85 | 86 | size_t rlt_reverse(unsigned char * src, unsigned char * dst, size_t size) { 87 | size_t unpacked = size - BRC_RLT_FOOTER_SIZE; 88 | if(*(src + unpacked) == 0) { 89 | brc_memcopy_separate(dst, src, unpacked); 90 | return unpacked; 91 | } else { 92 | unsigned char * write_head = dst; 93 | unsigned char * write_end = write_head + unpacked; 94 | size_t i = 0; 95 | while(i < unpacked) { 96 | if(src[i] == 0xff) { 97 | i++; 98 | *write_head++ = 0xfe + src[i++]; 99 | } else if (src[i] > 1) { 100 | *write_head++ = src[i++] - 1; 101 | } else { 102 | int rle = 1; 103 | while (src[i] <= 1 && i < unpacked) 104 | rle = (rle << 1) | src[i++]; 105 | rle -= 1; 106 | while(rle--) 107 | *write_head++ = 0; 108 | } 109 | } 110 | return write_head - dst; 111 | } 112 | } 113 | 114 | /*** vectorized sorted rank transform ***/ 115 | struct vmtf_s { 116 | unsigned char map[256]; 117 | }; 118 | 119 | inline void init_vmtf(vmtf_s * x) { 120 | for(size_t i = 0; i < 256; i++) 121 | x->map[i] = i; 122 | } 123 | 124 | inline void inverse_vmtf_update_only(vmtf_s * x, unsigned char r, unsigned char s) { 125 | size_t j = 0; 126 | do x->map[j] = x->map[j + 1]; 127 | while(++j < r); 128 | x->map[r] = s; 129 | } 130 | 131 | inline unsigned char inverse_vmtf_update_single(vmtf_s * x, unsigned char r, unsigned char s) { 132 | inverse_vmtf_update_only(x, r, s); 133 | return x->map[0]; 134 | } 135 | 136 | inline void generate_sorted_map(uint32_t * freqs, unsigned char * map) { 137 | uint32_t freqs_cpy[256]; 138 | for(size_t i = 0; i < 256; i++) 139 | freqs_cpy[i] = freqs[i]; 140 | 141 | for(size_t j = 0; j < 256; j++) { 142 | size_t max = 0; 143 | unsigned char bsym = 0; 144 | for(int i = 0; i < 256; i++) { 145 | if(freqs_cpy[i] > max) { 146 | bsym = i; 147 | max = freqs_cpy[i]; 148 | } 149 | } 150 | if(max == 0) break; 151 | else map[j] = bsym; 152 | freqs_cpy[bsym] = 0; 153 | } 154 | } 155 | 156 | int vsrc_forwards(unsigned char * src, unsigned char * dst, size_t src_size) { 157 | unsigned char * read_head = src; 158 | unsigned char * write_head = dst; 159 | 160 | vmtf_s state; 161 | init_vmtf(&state); 162 | 163 | size_t bucket[256] = {0}; 164 | uint32_t freqs[256] = {0}; 165 | unsigned char sort_map[256], s, r; 166 | 167 | size_t unique_syms = 0; 168 | for (size_t i = 0; i < src_size; i++) { 169 | s = read_head[i]; 170 | if (freqs[s] == 0) 171 | state.map[s] = unique_syms++; 172 | freqs[s]++; 173 | } 174 | 175 | brc_memcopy_separate(dst + src_size, freqs, BRC_VSRC_FOOTER_SIZE); 176 | generate_sorted_map(freqs, sort_map); 177 | 178 | for(size_t i = 0, bucket_pos = 0; i < unique_syms; i++) { 179 | s = sort_map[i]; 180 | bucket[s] = bucket_pos; 181 | bucket_pos += freqs[s]; 182 | } 183 | 184 | for(size_t i = 0; i < src_size; i++) { 185 | s = read_head[i]; 186 | r = state.map[s]; 187 | write_head[bucket[s]++] = r; 188 | if(r) { 189 | for(size_t i = 0; i < 256; i++) 190 | state.map[i] += (state.map[i] < r); 191 | state.map[s] = 0; 192 | } 193 | } 194 | return src_size + BRC_VSRC_FOOTER_SIZE; 195 | } 196 | 197 | int vsrc_reverse(unsigned char * src, unsigned char * dst, size_t src_size) { 198 | unsigned char * read_head = src; 199 | unsigned char * write_head = dst; 200 | size_t dst_size = src_size - BRC_VSRC_FOOTER_SIZE; 201 | 202 | vmtf_s state; 203 | init_vmtf(&state); 204 | 205 | size_t bucket[256] = {0}, bucket_end[256] = {0}; 206 | uint32_t freqs[256] = {0}; 207 | unsigned char sort_map[256], s, r; 208 | 209 | brc_memcopy_separate(freqs, src + dst_size, BRC_VSRC_FOOTER_SIZE); 210 | 211 | size_t total = 0; 212 | for(size_t i = 0; i < 256; i++) 213 | total += freqs[i]; 214 | 215 | if(total != dst_size) 216 | return printf(" Invalid sub header detected! \n"), BRC_EXIT_FAILURE; 217 | 218 | size_t unique_syms = 0; 219 | for(size_t i = 0; i < 256; i++) 220 | if(freqs[i] > 0) 221 | unique_syms++; 222 | 223 | generate_sorted_map(freqs, sort_map); 224 | 225 | for(size_t i = 0, bucket_pos = 0; i < unique_syms; i++) { 226 | s = sort_map[i]; 227 | state.map[read_head[bucket_pos]] = s; 228 | bucket[s] = bucket_pos + 1; 229 | bucket_pos += freqs[s]; 230 | bucket_end[s] = bucket_pos; 231 | } 232 | 233 | s = state.map[0]; 234 | for(size_t i = 0; i < dst_size; i++) { 235 | write_head[i] = s, r = 0xff; 236 | if(bucket[s] < bucket_end[s]) r = read_head[bucket[s]++]; 237 | if(r) s = inverse_vmtf_update_single(&state, r, s); 238 | } 239 | 240 | return dst_size; 241 | } 242 | 243 | /*** BRC TRANSFORM ***/ 244 | size_t brc_safe_memory_bound(size_t x) { 245 | return x + BRC_VSRC_FOOTER_SIZE + BRC_RLT_FOOTER_SIZE + BRC_PAD_SIZE; 246 | } 247 | 248 | int brc_init_cxt(brc_cxt_s * brc_cxt, size_t src_size) { 249 | size_t mempool = brc_safe_memory_bound(src_size); 250 | brc_cxt->block = (unsigned char*)brc_aligned_malloc(mempool, 8); 251 | if(brc_cxt->block == NULL) return BRC_EXIT_FAILURE; 252 | brc_cxt->eob = mempool; 253 | return BRC_EXIT_SUCCESS; 254 | } 255 | 256 | void brc_free_cxt(brc_cxt_s * brc_cxt) { 257 | brc_aligned_free(brc_cxt->block); 258 | brc_cxt->block = NULL; 259 | brc_cxt->size = 0; 260 | brc_cxt->eob = 0; 261 | } 262 | 263 | int brc_encode(brc_cxt_s * brc_cxt, unsigned char * src, size_t src_size) { 264 | int dst_size = vsrc_forwards(src, brc_cxt->block, src_size); 265 | if(dst_size == BRC_EXIT_FAILURE) return BRC_EXIT_FAILURE; 266 | 267 | unsigned char * swap = (unsigned char*)brc_aligned_malloc(brc_safe_memory_bound(dst_size), 8); 268 | if(swap == NULL) return BRC_EXIT_FAILURE; 269 | brc_memcopy_separate(swap, brc_cxt->block, dst_size); 270 | 271 | size_t packed_size = rlt_forwards(swap, brc_cxt->block, dst_size); 272 | 273 | brc_cxt->size = (size_t)packed_size; 274 | brc_aligned_free(swap); 275 | 276 | return BRC_EXIT_SUCCESS; 277 | } 278 | 279 | int brc_decode(brc_cxt_s * brc_cxt, unsigned char * dst, size_t * dst_size) { 280 | unsigned char * swap = (unsigned char*)brc_aligned_malloc(brc_safe_memory_bound(brc_cxt->size), 8); 281 | if(swap == NULL) return BRC_EXIT_FAILURE; 282 | brc_memcopy_separate(swap, brc_cxt->block, brc_cxt->size); 283 | 284 | size_t origin_size = rlt_reverse(swap, brc_cxt->block, brc_cxt->size); 285 | 286 | brc_cxt->size = (size_t)origin_size; 287 | brc_aligned_free(swap); 288 | 289 | origin_size = vsrc_reverse(brc_cxt->block, dst, brc_cxt->size); 290 | if(origin_size == BRC_EXIT_FAILURE) return BRC_EXIT_FAILURE; 291 | *dst_size = (size_t)origin_size; 292 | 293 | return BRC_EXIT_SUCCESS; 294 | } 295 | -------------------------------------------------------------------------------- /brc.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | BRC - Behemoth Rank Coding for BWT 3 | MIT License 4 | Copyright (c) 2018 Lucas Marsh 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | */ 9 | #pragma once 10 | 11 | #include "common.hpp" 12 | 13 | #define BRC_VERSION 4 14 | #define BRC_EXIT_SUCCESS 0 15 | #define BRC_EXIT_FAILURE -1 16 | 17 | struct brc_cxt_s { 18 | unsigned char * block; 19 | size_t size; 20 | size_t eob; 21 | }; 22 | 23 | /* allocate memory for BRC encoder or decoder */ 24 | int brc_init_cxt(brc_cxt_s * brc_cxt, size_t src_size); 25 | 26 | /* free all memory associated with brc */ 27 | void brc_free_cxt(brc_cxt_s * brc_cxt); 28 | 29 | /* generates BRC block transform from 'src' and stores it in 'brc_cxt'; returns 0 for successful encode, else -1 */ 30 | int brc_encode(brc_cxt_s * brc_cxt, unsigned char * src, size_t src_size); 31 | 32 | /* undoes BRC block transform from 'brc_cxt' and stores it in 'dst'; returns 0 for successful encode, else -1 */ 33 | int brc_decode(brc_cxt_s * brc_cxt, unsigned char * dst, size_t * dst_size); 34 | -------------------------------------------------------------------------------- /common.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include -------------------------------------------------------------------------------- /main.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | BRC - Behemoth Rank Coding for BWT 3 | MIT License 4 | Copyright (c) 2018 Lucas Marsh 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | */ 9 | #include "brc.hpp" 10 | #include "common.hpp" 11 | 12 | #define BUFFER_SIZE (1 << 20) 13 | 14 | int encode_stream_serial(FILE * f_input, FILE * f_output) { 15 | unsigned char * buffer = (unsigned char*)malloc(BUFFER_SIZE); 16 | memset(buffer, 0, BUFFER_SIZE); 17 | if(!buffer) 18 | return printf(" Failed to allocate input! \n"), EXIT_FAILURE; 19 | 20 | brc_cxt_s brc_cxt; 21 | brc_init_cxt(&brc_cxt, BUFFER_SIZE); 22 | 23 | time_t start; 24 | double cpu_time = 0; 25 | 26 | size_t bytes_read; 27 | size_t total_bytes_read = 0; 28 | size_t total_bytes_written = 0; 29 | while((bytes_read = fread(buffer, 1, BUFFER_SIZE, f_input)) > 0) { 30 | total_bytes_read += bytes_read; 31 | start = clock(); 32 | 33 | if(brc_encode(&brc_cxt, buffer, bytes_read) == BRC_EXIT_FAILURE) 34 | return printf(" Failed to encode input! \n"), EXIT_FAILURE; 35 | 36 | cpu_time += ((double)clock() - (double)start) / CLOCKS_PER_SEC; 37 | fwrite(&brc_cxt.size, 1, sizeof(brc_cxt.size), f_output); 38 | fwrite(brc_cxt.block, 1, brc_cxt.size, f_output); 39 | total_bytes_written += brc_cxt.size; 40 | 41 | printf(" read %llu MB => %llu MB, cpu time = %.3f seconds, throughput = %.3f MB/s \r", 42 | (long long)(total_bytes_read / 1000000), 43 | (long long)(total_bytes_written / 1000000), 44 | cpu_time, 45 | ((double)total_bytes_read / 1000000.f) / cpu_time 46 | ); 47 | } 48 | 49 | printf(" read %llu MB => %llu MB, cpu time = %.3f seconds, throughput = %.3f MB/s \n", 50 | (long long)(total_bytes_read / 1000000), 51 | (long long)(total_bytes_written / 1000000), 52 | cpu_time, 53 | ((double)total_bytes_read / 1000000.f) / cpu_time 54 | ); 55 | 56 | free(buffer); 57 | brc_free_cxt(&brc_cxt); 58 | return EXIT_SUCCESS; 59 | } 60 | 61 | int decode_stream_serial(FILE * f_input, FILE * f_output) { 62 | unsigned char * buffer = (unsigned char*)malloc(BUFFER_SIZE); 63 | memset(buffer, 0, BUFFER_SIZE); 64 | if(!buffer) 65 | return printf(" Failed to allocate output! \n"), EXIT_FAILURE; 66 | 67 | brc_cxt_s brc_cxt; 68 | brc_init_cxt(&brc_cxt, BUFFER_SIZE); 69 | 70 | time_t start; 71 | double cpu_time = 0; 72 | 73 | size_t bytes_read; 74 | size_t total_bytes_read = 0; 75 | size_t total_bytes_written = 0; 76 | while((bytes_read = fread(&brc_cxt.size, 1, sizeof(brc_cxt.size), f_input)) > 0) { 77 | total_bytes_read += bytes_read; 78 | 79 | if(brc_cxt.size > brc_cxt.eob) 80 | return printf(" Read invalid data! \n"), EXIT_FAILURE; 81 | 82 | bytes_read = fread(brc_cxt.block, 1, brc_cxt.size, f_input); 83 | total_bytes_read += bytes_read; 84 | 85 | size_t original_size; 86 | start = clock(); 87 | 88 | if(brc_decode(&brc_cxt, buffer, &original_size) == BRC_EXIT_FAILURE) 89 | return printf(" Failed to decode input! \n"), EXIT_FAILURE; 90 | 91 | cpu_time += ((double)clock() - (double)start) / CLOCKS_PER_SEC; 92 | 93 | 94 | fwrite(buffer, 1, original_size, f_output); 95 | total_bytes_written += original_size; 96 | 97 | printf(" read %llu MB => %llu MB, cpu time = %.3f seconds, throughput = %.3f MB/s \r", 98 | (long long)(total_bytes_read / 1000000), 99 | (long long)(total_bytes_written / 1000000), 100 | cpu_time, 101 | ((double)total_bytes_written / 1000000.f) / cpu_time 102 | ); 103 | } 104 | 105 | printf(" read %llu MB => %llu MB, cpu time = %.3f seconds, throughput = %.3f MB/s \n", 106 | (long long)(total_bytes_read / 1000000), 107 | (long long)(total_bytes_written / 1000000), 108 | cpu_time, 109 | ((double)total_bytes_written / 1000000.f) / cpu_time 110 | ); 111 | 112 | free(buffer); 113 | brc_free_cxt(&brc_cxt); 114 | return EXIT_SUCCESS; 115 | } 116 | 117 | int encode_stream_parallel(FILE * f_input, FILE * f_output, int num_threads) { 118 | brc_cxt_s brc_cxt[num_threads]; 119 | unsigned char * buffer[num_threads]; 120 | 121 | for(size_t t = 0; t < num_threads; t++) { 122 | if(brc_init_cxt(&brc_cxt[t], BUFFER_SIZE) == BRC_EXIT_FAILURE) 123 | return printf(" Failed to allocate brc cxt! \n"), EXIT_FAILURE; 124 | buffer[t] = (unsigned char*)malloc(BUFFER_SIZE); 125 | memset(buffer[t], 0, BUFFER_SIZE); 126 | if(!buffer[t]) 127 | return printf(" Failed to allocate output! \n"), EXIT_FAILURE; 128 | } 129 | 130 | time_t start; 131 | double cpu_time = 0; 132 | size_t bytes_read[num_threads] = {0}; 133 | size_t total_bytes_read = 0, total_bytes_written = 0; 134 | int errs[num_threads] = {0}; 135 | 136 | while(1) { 137 | int s = 0; 138 | bool end = false; 139 | for(; s < num_threads; s++) { 140 | bytes_read[s] = fread(buffer[s], 1, BUFFER_SIZE, f_input); 141 | if(bytes_read[s] == 0) { 142 | end = true; 143 | break; 144 | } 145 | } 146 | 147 | start = clock(); 148 | 149 | #pragma omp parallel for num_threads(s) 150 | for(int i = 0; i < s; i++) { 151 | errs[i] = brc_encode(&brc_cxt[i], buffer[i], bytes_read[i]); 152 | } 153 | 154 | cpu_time += ((double)clock() - (double)start) / CLOCKS_PER_SEC; 155 | 156 | for(int i = 0; i < s; i++) { 157 | total_bytes_read += bytes_read[i]; 158 | total_bytes_written += brc_cxt[i].size + sizeof(brc_cxt[s].size); 159 | fwrite(&brc_cxt[i].size, 1, sizeof(brc_cxt[i].size), f_output); 160 | fwrite(brc_cxt[i].block, 1, brc_cxt[i].size, f_output); 161 | } 162 | 163 | printf(" read %llu MB => %llu MB, cpu time = %.3f seconds, throughput = %.3f MB/s \r", 164 | (long long)(total_bytes_read / 1000000), 165 | (long long)(total_bytes_written / 1000000), 166 | cpu_time, 167 | ((double)total_bytes_read / 1000000.f) / cpu_time 168 | ); 169 | 170 | 171 | if(end) break; 172 | } 173 | 174 | printf(" read %llu MB => %llu MB, cpu time = %.3f seconds, throughput = %.3f MB/s \n", 175 | (long long)(total_bytes_read / 1000000), 176 | (long long)(total_bytes_written / 1000000), 177 | cpu_time, 178 | ((double)total_bytes_read / 1000000.f) / cpu_time 179 | ); 180 | 181 | for(size_t t = 0; t < num_threads; t++) { 182 | free(buffer[t]); 183 | brc_free_cxt(&brc_cxt[t]); 184 | } 185 | return EXIT_SUCCESS; 186 | } 187 | 188 | int decode_stream_parallel(FILE * f_input, FILE * f_output, int num_threads) { 189 | brc_cxt_s brc_cxt[num_threads]; 190 | unsigned char * buffer[num_threads]; 191 | 192 | for(size_t t = 0; t < num_threads; t++) { 193 | if(brc_init_cxt(&brc_cxt[t], BUFFER_SIZE) == BRC_EXIT_FAILURE) 194 | return printf(" Failed to allocate brc cxt! \n"), EXIT_FAILURE; 195 | buffer[t] = (unsigned char*)malloc(BUFFER_SIZE); 196 | memset(buffer[t], 0, BUFFER_SIZE); 197 | if(!buffer[t]) 198 | return printf(" Failed to allocate output! \n"), EXIT_FAILURE; 199 | } 200 | 201 | time_t start; 202 | double cpu_time = 0; 203 | size_t bytes_read[num_threads] = {0}; 204 | size_t original_sizes[num_threads] = {0}; 205 | size_t total_bytes_read = 0, total_bytes_written = 0; 206 | int errs[num_threads] = {0}; 207 | 208 | while(1) { 209 | int s = 0; 210 | bool end = false; 211 | for(; s < num_threads; s++) { 212 | fread(&brc_cxt[s].size, 1, sizeof(brc_cxt[s].size), f_input); 213 | if(brc_cxt[s].size > brc_cxt[s].eob) return printf(" Invalid input! \n"), EXIT_FAILURE; 214 | bytes_read[s] = fread(brc_cxt[s].block, 1, brc_cxt[s].size, f_input); 215 | if(bytes_read[s] == 0) { 216 | end = true; 217 | break; 218 | } 219 | } 220 | 221 | start = clock(); 222 | 223 | #pragma omp parallel for num_threads(s) 224 | for(int i = 0; i < s; i++) { 225 | errs[i] = brc_decode(&brc_cxt[i], buffer[i], &original_sizes[i]); 226 | } 227 | 228 | cpu_time += ((double)clock() - (double)start) / CLOCKS_PER_SEC; 229 | 230 | for(int i = 0; i < s; i++) { 231 | total_bytes_read += bytes_read[i] + sizeof(brc_cxt[s].size); 232 | total_bytes_written += original_sizes[i]; 233 | fwrite(buffer[i], 1, original_sizes[i], f_output); 234 | } 235 | 236 | printf(" read %llu MB => %llu MB, cpu time = %.3f seconds, throughput = %.3f MB/s \r", 237 | (long long)(total_bytes_read / 1000000), 238 | (long long)(total_bytes_written / 1000000), 239 | cpu_time, 240 | ((double)total_bytes_written / 1000000.f) / cpu_time 241 | ); 242 | 243 | 244 | if(end) break; 245 | } 246 | 247 | printf(" read %llu MB => %llu MB, cpu time = %.3f seconds, throughput = %.3f MB/s \n", 248 | (long long)(total_bytes_read / 1000000), 249 | (long long)(total_bytes_written / 1000000), 250 | cpu_time, 251 | ((double)total_bytes_written / 1000000.f) / cpu_time 252 | ); 253 | 254 | for(size_t t = 0; t < num_threads; t++) { 255 | free(buffer[t]); 256 | brc_free_cxt(&brc_cxt[t]); 257 | } 258 | return EXIT_SUCCESS; 259 | } 260 | 261 | int main(int argc, char ** argv) { 262 | if(argc < 4) { 263 | printf(" BRC version %i - Behemoth Rank Coding for BWT \n\ 264 | Lucas Marsh (c) 2018, MIT licensed \n\ 265 | Usage: brc.exe input output num-threads\n\ 266 | Arguments: \n\ 267 | c : compress \n\ 268 | d : decompress \n\ 269 | Press 'enter' to continue", BRC_VERSION); 270 | getchar(); 271 | return 0; 272 | } 273 | 274 | if(strcmp(argv[2], argv[3]) == 0) return perror(" Refusing to write to input, change the output directory! \n"), EXIT_FAILURE; 275 | 276 | FILE* f_input = fopen(argv[2], "rb"); 277 | FILE* f_output = fopen(argv[3], "wb"); 278 | if (f_input == NULL) return perror(argv[2]), 1; 279 | if (f_output == NULL) return perror(argv[3]), 1; 280 | 281 | int num_threads = 4; 282 | if(argc > 4) num_threads = atoi(argv[4]); 283 | 284 | switch(argv[1][0]) { 285 | case 'c': { 286 | if(num_threads > 1) { 287 | if(encode_stream_parallel(f_input, f_output, num_threads) != EXIT_SUCCESS) 288 | return printf(" Encoding failed! \n"), EXIT_FAILURE; 289 | } else { 290 | if(encode_stream_serial(f_input, f_output) != EXIT_SUCCESS) 291 | return printf(" Encoding failed! \n"), EXIT_FAILURE; 292 | } 293 | } break; 294 | case 'd': { 295 | if(num_threads > 1) { 296 | if(decode_stream_parallel(f_input, f_output, num_threads) != EXIT_SUCCESS) 297 | return printf(" Decoding failed! \n"), EXIT_FAILURE; 298 | } else { 299 | if(decode_stream_serial(f_input, f_output) != EXIT_SUCCESS) 300 | return printf(" Decoding failed! \n"), EXIT_FAILURE; 301 | } 302 | } break; 303 | default: printf(" Invalid argument!\n"); 304 | } 305 | 306 | fclose(f_input); 307 | fclose(f_output); 308 | return EXIT_SUCCESS; 309 | } 310 | -------------------------------------------------------------------------------- /make_avx.bat: -------------------------------------------------------------------------------- 1 | g++ -std=c++11 -Ofast -s -static -fopenmp -funroll-loops -ftree-vectorize -mavx main.cpp brc.cpp -o brc_avx 2 | PAUSE -------------------------------------------------------------------------------- /make_sse2.bat: -------------------------------------------------------------------------------- 1 | g++ -std=c++11 -Ofast -s -static -fopenmp -funroll-loops -ftree-vectorize -msse2 main.cpp brc.cpp -o brc_sse2 2 | PAUSE -------------------------------------------------------------------------------- /make_std.bat: -------------------------------------------------------------------------------- 1 | g++ -std=c++11 -Ofast -s -static -fopenmp -funroll-loops -ftree-vectorize main.cpp brc.cpp -o brc_std 2 | PAUSE --------------------------------------------------------------------------------